--- /dev/null
+*.o
+*.pico
+*.so
+*.a
+*.dylib
+*.pyc
+*~
+CMakeLists.txt.user
+bin/
+build/
+docs/Doxyfile
+lib/
+tests/bin/test_pbbam
+tests/data/test_group_query/group.fofn
+tests/src/TestData.h
+
--- /dev/null
+language: cpp
+compiler:
+ - gcc
+
+before_install:
+
+ # Travis's default installs of gcc, boost, & cmake currently lag behind the minimums we need.
+ # So we need to manually setup them up.
+ #
+ # - gcc 4.8 (current default on Travis is 4.7, which is no good for C++11 work)
+ # - boost 1.55
+ # - cmake 3.x
+
+ # add external repos
+ - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test # gcc
+ - sudo add-apt-repository -y ppa:boost-latest/ppa # boost
+ - sudo add-apt-repository -y ppa:george-edison55/precise-backports # cmake
+
+ # remove existing cmake install
+ - sudo apt-get remove -qq cmake cmake-data
+ - sudo apt-get autoremove -qq
+
+ # update apt
+ - sudo apt-get update -y -qq
+
+ # install
+ - sudo apt-get install -y -qq g++-4.8 boost1.55 cmake-data cmake
+
+ # make sure we're using new gcc tools
+ - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 90
+ - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 90
+ - sudo update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-4.8 90
+
+ # prep zlib
+ - sudo apt-get install -y -qq zlib1g-dev
+
+ # prep htslib
+ - "cd .. && git clone https://github.com/PacificBiosciences/htslib.git && cd htslib && make && sudo make install; cd $TRAVIS_BUILD_DIR"
+
+ # prep GoogleTest
+ - sudo apt-get install -y -qq libgtest-dev
+
+before_script:
+ # run cmake
+ - mkdir build
+ - cd build
+ - cmake .. -DGTEST_SRC_DIR=/usr/src/gtest -DCMAKE_BUILD_TYPE=Debug
+
+script:
+ # build & test
+ - make -j 3
+ - make test
+
+branches:
+ only:
+ - master
+
+notifications:
+ recipients:
+ - dbarnett@pacb.com
+ email:
+ on_success: change
+ on_failure: always
+
--- /dev/null
+# PacBio::BAM - change log\r
+\r
+All notable changes to this project will be documented in this file.\r
+This project adheres to [Semantic Versioning](http://semver.org/). \r
+\r
+**NOTE:** The current series (0.y.z) is under initial development. Anything may\r
+change at any time. The public API should not be considered stable yet. Once we\r
+lock down a version 1.0.0, this will define a reference point & compatibility\r
+guarantees will be maintained within each major version series.\r
+\r
+## Active\r
+\r
+### Added\r
+- Default DataSet 'Version' attribute if none already present (currently 4.0.0)\r
+\r
+## [0.7.4] - 2016-11-18\r
+\r
+### Changed\r
+- Compatibility for merging BAM files no longer requires exact match of PacBioBAM\r
+version number (header @HD:pb tag). As long as both files meet the minimum \r
+supported version number, the merge is allowed.\r
+\r
+## [0.7.3] - 2016-11-11\r
+\r
+### Added\r
+- Support for S/P2-C2 chemistry and forthcoming 4.0 basecaller\r
+\r
+## [0.7.2] - 2016-11-10\r
+\r
+### Removed\r
+- SAM header version equality check for merging BAM files. PacBioBAM version \r
+number carries more meaning for PacBio data and thus will be the basis of \r
+ensuring compatible merging.\r
+\r
+## [0.7.1] - 2016-11-09\r
+\r
+### Added\r
+- (Unindexed) FASTA reader & FastaSequence data structure.\r
+- Missing unit tests for internal BAM tag access.\r
+- Chemistry data for basecaller v3.3.\r
+- Missing parsers for filtering barcode quality ("bq"), barcode forward ("bcf"), \r
+and barcode reverse ("bcr") from DataSetXML.\r
+- Integrated htslib into project.\r
+\r
+### Fixed\r
+- Reverse complement on padding base.\r
+\r
+## [0.7.0] - 2016-09-26 \r
+\r
+### Added\r
+- Clipping for CCS records\r
+\r
+### Fixed\r
+- Cached position data leaking across records while iterating.\r
+- Rolled back default pulse behavior in internal BAM API, to be backward-\r
+compatible with existing client code (for now at least). v0.6.0 introduced\r
+returning basecalled positions ONLY by default, rather than return ALL \r
+pulses. \r
+- Fixed crash when attempting to read from empty BAM/PBI files using the \r
+PbiFilter-enabled APIs.\r
+\r
+## [0.6.0] - 2016-09-13\r
+\r
+### Added\r
+- BamWriter writes to a BAM file with the target name plus a ".tmp" suffix. On\r
+successful completion (i.e. normal BamWriter destruction, not triggered by a\r
+thrown exception) the file is renamed to the actual requested filename.\r
+- PBI file creation follows the same temporary naming convention.\r
+- Support for barcode pair (forward, reverse) in DataSetXML filter.\r
+- Validation API & 'auto-validate' compile-time switch. \r
+- Added support for a batched QNAME whitelist filter in DataSet XML. Uses (new) \r
+Property name 'qname_file', with the value being the filepath containing the \r
+whitelist.\r
+- Exposed MD5 hashing to API.\r
+- Ability to remove base features from a ReadGroupInfo object.\r
+- Can construct an aggregate PbiRawData index object from a DataSet: essentially\r
+concatenates all PBI data within the dataset.\r
+- New SamWriter class to create SAM-formatted output of PacBio BAM data.\r
+- Extended APIs for accessing "internal BAM" data, including PulseBehavior\r
+switch for selecting between all pulses & basecalls only. \r
+\r
+### Fixed\r
+- Improper 'clip to reference' product for BamRecord in some cases.\r
+- Improper behavior in tag accessors (e.g. BamRecord::IPD()) on reverse strand-\r
+aligned reads (bug 31339).\r
+- Improper basecaller version parsing in ReadGroupInfo.\r
+\r
+### Changed\r
+- RecordType::POLYMERASE renamed to RecordType::ZMW to reflect changes in\r
+PacBio BAM spec v3.0.4\r
+- Refactored the 'virtual' reader classes - to match the new nomenclature,\r
+and to combine the virtual reader & composite readers behind a shared \r
+interface. The old class names still exist, as typedefs to the new ones, \r
+and the interfaces are completely source-compatible - so as not to break \r
+existing code. However, the old classes should be considered deprecated and \r
+the new ones preferred. Below is the mapping of old -> new:\r
+\r
+ VirtualPolymeraseBamRecord -> VirtualZmwBamRecord\r
+ VirtualPolymeraseReader -> ZmwReadStitcher\r
+ VirtualPolymeraseCompositeReader -> ZmwReadStitcher\r
+ ZmwWhitelistVirtualReader -> WhitelistedZmwReadStitcher\r
+\r
+\r
+## [0.5.0] - 2016-02-22\r
+\r
+### Added\r
+- Platform model tag added to read group as RG::PM\r
+- New scrap zmw type sz\r
+- pbmerge accepts DataSetXML as input - using top-level resource BAMs as input,\r
+applying filters, and generating a merged BAM. Also added FOFN support, instead\r
+of listing out BAMs as command line args.\r
+- PbiLocalContextFilter to allow filtering on subread local context.\r
+- PbiBuilder: multithreading & zlib compression-level tuning for PBI output\r
+\r
+### Fixed\r
+- Fixed mishandling of relative BAM filenames in the filename constructor for\r
+DataSet (e.g. DataSet ds("../data.bam")).\r
+\r
+## [0.4.5] - 2016-01-14\r
+\r
+### Changed\r
+- PbiFilterQuery (and any other PBI-backed query, e.g. ZmwQuery ) now throws if\r
+PBI file(s) missing insted of returning empty result.\r
+- GenomicIntervalQuery now throws if BAI file(s) missing instead of returning\r
+empty result.\r
+- BamFile will throw if file is truncated (e.g. missing the EOF block). Disable\r
+by defining PBBAM_NO_CHECK_EOF .\r
+\r
+## [0.4.4] - 2016-01-07\r
+\r
+### Added\r
+- bam2sam command line utility. The primary benefit is removing the dependency\r
+on samtools during tests, but also provides users a functioning BAM -> SAM\r
+converter in the absence of samtools.\r
+- pbmerge command line utility. Allows merging N BAM files into one, optionally\r
+creating the PBI file alongside.\r
+- Added BamRecord::Pkmean2 & Pkmid2, 2D equivalent of Pkmean/Pkmid, for internal\r
+BAMs.\r
+\r
+### Removed \r
+- samtools dependency\r
+\r
+## [0.4.3] - 2015-12-22\r
+\r
+### Added\r
+- Compile using ccache by default, if available. Can be manually disabled using\r
+-DPacBioBAM_use_ccache=OFF with cmake.\r
+- pbindexdump: command-line utility that converts PBI file data into human-\r
+readable formats. (JSON by default).\r
+\r
+### Changed\r
+- CMake option PacBioBAM_build_pbindex is being deprecated. Use\r
+PacBioBAM_build_tools instead.\r
+\r
+## [0.4.2] - 2015-12-22\r
+\r
+### Changed\r
+- BamFile::PacBioIndexExists & StandardIndexExists no longer check timestamps.\r
+Copying/moving files around can yield timestamps that are not helpful (no longer\r
+guaranteed that the .pbi will be "newer" than the .bam, even though no content\r
+changed). Added methods (e.g. bool BamFile::PacBioIndexIsNewer()) to do that\r
+lookup if needed, but it is no longer done automatically.\r
+\r
+## [0.4.1] - 2015-12-18\r
+\r
+### Added\r
+- BamRecord::HasNumPasses\r
+\r
+### Changed\r
+- VirtualPolymeraseBamRecord::VirtualRegionsTable(type) returns an empty vector\r
+of regions if none are associated with the requested type, instead of throwing.\r
+\r
+## [0.4.0] - 2015-12-15\r
+\r
+### Changed\r
+- Redesigned PbiFilter interface and backend. Previous implementation did not\r
+scale well as intermediate results were far too unwieldy. This redesign provides\r
+speedups of orders of magnitude in many cases.\r
+\r
+## [0.3.2] - 2015-12-10\r
+\r
+### Added \r
+- Support for ReadGroupInfo sequencing chemistry data.\r
+InvalidSequencingChemistryException thrown if an unsupported combination is\r
+encountered.\r
+- VirtualPolymeraseCompositeReader - for re-stitching records, across multiple\r
+resources (e.g. from DataSetXML). Reader respects DataSet filter criteria.\r
+\r
+## [0.3.1] - 2015-10-30\r
+\r
+### Added\r
+- ZmwWhitelistVirtualReader: similar to VirtualPolymeraseReader but restricts\r
+iteration to a whitelist of ZMW hole numbers, leveraging PBI index data for\r
+random-access.\r
+\r
+### Fixed\r
+- Fixed error in PBI construction, in which entire file sections (e.g.\r
+BarcodeData or MappedData) where being dropped when any one record lacked data.\r
+Correct behavior is to allow file section ommission if all records lack that\r
+data type.\r
+\r
+## [0.3.0] - 2015-10-29\r
+\r
+### Fixed\r
+- Improper reporting of current offset from multi-threaded BamWriter. This had\r
+the effect of creating broken PBIs that were written alongside the BAM. Added a\r
+flush step, which incurs a performance hit, but restores correctness.\r
+\r
+## [0.2.4] - 2015-10-26\r
+\r
+### Fixed\r
+- Empty PbiFilter now returns all records, instead of filtering away all records.\r
+\r
+## [0.2.3] - 2015-10-26\r
+\r
+### Added/Fixed\r
+- Syncing DataSetXML across APIs. Primary changes include output of Version\r
+attribute ("3.0.1") on appropriate elements, as well as resolution of namespace\r
+issues.\r
+\r
+## [0.2.2] - 2015-10-22\r
+\r
+### Added\r
+- Added BAI bin calculation to BamWriter::Write, to ensure maximal compatibility\r
+with downstream tools (e.g. 'samtools index'). A new BinCalculationMode enum\r
+flag in BamWriter constructor cotnrols whether this behavior is enabled[default]\r
+or not.\r
+\r
+## [0.2.1] - 2015-10-19\r
+\r
+### Added\r
+- Exposed the following classes to public API:\r
+ - BamReader\r
+ - BaiIndexedBamReader\r
+ - PbiIndexedBamReader\r
+ - GenomicIntervalCompositeBamReader\r
+ - PbiFilterCompositeBamReader\r
+\r
+## [0.2.0] - 2015-10-09\r
+\r
+### Changed\r
+- BAM spec v3.0.1 compliance. Previous (betas) versions of the BAM spec are not\r
+supported and will causean exception to be throw if encountered.\r
+- PBI lookup interface & backend, see PbiIndex.h & PbiLookupData.h for details.\r
+\r
+### Added \r
+- BamFile::PacBioIndexExists() & BamFile::StandardIndexExists() - query the\r
+existence of index files without auto-building them if they are missing, as in\r
+BamFile::Ensure*IndexExists().\r
+- GenomicInterval now accepts an htslib/samtools-style REGION string in the\r
+constructor: GenomicInterval("chr1:1000-2000"). Please note though, that pbbam\r
+uses 0-based coordinates throughout, whereas samtools expects 1-based. The above\r
+string is equivalent to "chr1:1001-2000" in samtools.\r
+- Built-in PBI filters. See PbiFlter.h & PbiFilterTypes.h for built-in filters\r
+and constructing composite filters. These can be used in conjunction with the\r
+new PbiFilterQuery, which takes a generic PbiFilter and applies that to a\r
+DataSet for iteration.\r
+- New built-in queries: BarcodeQuery, ReadAccuracyQuery, SubreadLengthQuery.\r
+These leverage the new filter API to construct a PbiFilter and apply to a\r
+DataSet.\r
+- Built-in BamRecord comparators that are STL-compatible. See Compare.h for full\r
+list. This allows for statements like the following, which sorts records by ZMW\r
+number:\r
+``` c++\r
+ vector<BamRecord> data;\r
+ std::sort(data.begin(), data.end(), Compare::Zmw());\r
+```\r
+- "exciseSoftClips" option to BamRecord::CigarData()\r
+\r
+## [0.1.0] - 2015-07-17\r
+\r
+### Changed\r
+- BAM spec v3.0b7 compliance\r
+ - Removal of 'M' as allowed CIGAR operation. Attempt to use such a CIGAR op\r
+ will throw an exception.\r
+ - Addition of IPD/PulseWidth codec version info in header\r
+ \r
+### Added\r
+- Auto-generation of UTC timestamp for DataSet objects\r
+- PbiBuilder - allows generation of PBI index data alongside generation or\r
+modification of BAM record data. This obviates the need to wait for a completed\r
+BAM, then go through the zlib decompression, etc.\r
+- Added DataSet::FromXml(string xml) to create DataSets from "raw" XML string,\r
+rather than building up using DataSet API or loading from existing file.\r
+- "pbindex" command line tool to generate ".pbi" files from BAM data. The\r
+executable is built by default, but can be disabled using the cmake option\r
+"-DPacBioBAM_build_pbindex=OFF".\r
+ \r
+### Fixed\r
+- PBI construction failing on CCS reads\r
+\r
+## [0.0.8] - 2015-07-02\r
+\r
+### Changed\r
+- Build system refactoring.\r
+\r
+## [0.0.7] - 2015-07-02\r
+\r
+### Added\r
+- PBI index lookup API. Not so much intended for client use directly, but will\r
+enable construction of higher-level semantic queries: grouping by, filtering,\r
+etc.\r
+- DataSet & PBI-aware queries (e.g. ZmwGroupQuery). More PBI-enabled queries to\r
+follow.\r
+- More flexibility in tag access. Samtools has a habit of performing a\r
+"shrink-to-fit" when it handles integer-valued tag data. Thus we cannot\r
+**guarantee** the binary type that our API will have to process. Safe\r
+conversions are allowed on integer-like data only. Under- or overflows in\r
+casting will trigger an exception. All other tag data types must be asked for\r
+explicitly, or else an exception will be raised, as before.\r
+- BamHeader::DeepCopy - allows creation of editable header data, without\r
+overwriting all shared instances\r
+\r
+### Fixed\r
+- XSD compliance for DataSet APIs.\r
+\r
+### Changed\r
+- The functionality provided by ZmwQuery (group by hole number), is now\r
+available using the ZmwGroupQuery object. The new ZmwQuery returns a single-\r
+record iterator (a la EntireFileQuery), but limited to a whitelist of requested\r
+hole numbers.\r
+\r
+### Removed\r
+- XSD non-compliant classes (e.g. ExternalDataReference)\r
+\r
+## [0.0.6] - 2015-06-07\r
+\r
+### Added\r
+\r
+- Accessor methods for pulse bam support:\r
+ - LabelQV()\r
+ - AltLabelQV()\r
+ - LabelTag()\r
+ - AltLabelTag()\r
+ - Pkmean()\r
+ - Pkmid()\r
+ - PrePulseFrames() only RC, no clipping\r
+ - PulseCallWidth() only RC, no clipping\r
+ - PulseCall() case-sensitive RC, no clipping\r
+ - IPDRaw() to avoid up and downscaling for stitching\r
+- BamRecord::ParseTagName and BamRecord::ParseTagString to convert a two \r
+ character tag string to a TagName enum and back. Allows a switch over tags.\r
+- VirtualPolymeraseReader to create VirtualPolymeraseBamRecord from a \r
+ subreads|hqregion+scraps.bam\r
+- VirtualRegion represents annotations of the polymerase reads, for adapters, \r
+ barcodes, lqregions, and hqregions.\r
+- ReadGroupInfo operator== \r
+\r
+### Fixed\r
+\r
+- Reimplemented QueryStart(int), QueryEnd(int), UpdateName(void), \r
+ ReadGroup(ReadGroupInfo&), ReadGroupId(std::string&);\r
+\r
+## [0.0.5] - 2015-05-29\r
+\r
+### Added\r
+\r
+- DataSet support. This includes XML I/O, basic dataset query/manipulation, and\r
+multi-BAM-file queries. New classes are located in <pbbam/dataset/>. DataSet-\r
+capable queries currently reside in the PacBio::BAM::staging namespace. These\r
+will be ported over to the main namespace once the support is stabilized and\r
+works seamlessly with either a single BamFile or DataSet object as input. (bug\r
+25941)\r
+- PBI support. This includes read/write raw data & building from a BamFile. The\r
+lookup API for random-access queries is under development, but the raw data is\r
+available - for creating PBI files & generating summary statistics. (bug 26025)\r
+- C# SWIG bindings, alongside existing Python and R wrappers.\r
+- LocalContextFlags support in BamRecord (bug 26623)\r
+\r
+### Fixed\r
+\r
+- BamRecord[Impl] map quality now initialized with 255 (missing) value, instead\r
+of 0. (bug 26228)\r
+- ReadGroupId calculation. (bug 25940)\r
+ \r
+## [0.0.4] - 2015-04-22\r
+\r
+### Added\r
+\r
+- This changelog. Hope it helps.\r
+- Hook to set verbosity of underlying htslib warnings.\r
+- Grouped queries. (bug 26361)\r
+\r
+### Changed\r
+\r
+- Now using exceptions instead of return codes, output parameters, etc.\r
+- Removed "messy" shared_ptrs across interface (see especially BamHeader). These\r
+are now taken care of within the API, not exposed to client code.\r
+\r
+### Removed\r
+\r
+- BamReader \r
+\r
+### Fixed\r
+\r
+- ASCII tag output. (bug 26381)\r
--- /dev/null
+########################################################################
+# CMake build script for PacBioBAM library.
+########################################################################
+
+cmake_policy(SET CMP0048 NEW) # lets us set version in project()
+project(PacBioBAM VERSION 0.7.4 LANGUAGES CXX C)
+cmake_minimum_required(VERSION 3.0)
+
+# project name & version
+set(PacBioBAM_NAME pbbam)
+set(PacBioBAM_VERSION
+ "${PacBioBAM_VERSION_MAJOR}.${PacBioBAM_VERSION_MINOR}.${PacBioBAM_VERSION_PATCH}"
+)
+
+# list build-time options
+option(PacBioBAM_build_docs "Build PacBioBAM's API documentation." ON)
+option(PacBioBAM_build_tests "Build PacBioBAM's unit tests." ON)
+option(PacBioBAM_build_shared "Build PacBioBAM as shared library as well." OFF)
+option(PacBioBAM_build_tools "Build PacBioBAM command line utilities (e.g. pbindex)" ON)
+option(PacBioBAM_wrap_csharp "Build PacBioBAM with SWIG bindings for C#." OFF)
+option(PacBioBAM_wrap_python "Build PacBioBAM with SWIG bindings for Python." OFF)
+option(PacBioBAM_wrap_r "Build PacBioBAM with SWIG bindings for R." OFF)
+option(PacBioBAM_use_modbuild "Build PacBioBAM using Modular Build System." OFF)
+option(PacBioBAM_use_ccache "Build PacBioBAM using ccache, if available." ON)
+option(PacBioBAM_auto_validate "Build PacBioBAM with auto-validation enabled." OFF)
+
+if (PacBioBAM_wrap_csharp OR PacBioBAM_wrap_r OR PacBioBAM_wrap_python)
+ set(wrapping_swig TRUE)
+else()
+ set(wrapping_swig FALSE)
+endif()
+
+if(PacBioBAM_build_tests)
+ enable_testing()
+endif()
+
+# project paths
+set(PacBioBAM_RootDir ${CMAKE_CURRENT_LIST_DIR})
+set(PacBioBAM_DocsDir ${PacBioBAM_RootDir}/docs)
+set(PacBioBAM_IncludeDir ${PacBioBAM_RootDir}/include)
+set(PacBioBAM_SourceDir ${PacBioBAM_RootDir}/src)
+set(PacBioBAM_SwigSourceDir ${PacBioBAM_RootDir}/src/swig)
+set(PacBioBAM_TestsDir ${PacBioBAM_RootDir}/tests)
+set(PacBioBAM_ThirdPartyDir ${PacBioBAM_RootDir}/third-party)
+set(PacBioBAM_ToolsDir ${PacBioBAM_RootDir}/tools)
+
+if(NOT PacBioBAM_OutputDir)
+ set(PacBioBAM_OutputDir ${CMAKE_CURRENT_BINARY_DIR})
+else()
+ if(${wrapping_swig})
+ message(FATAL_ERROR "SWIG bindings not currently supported in modular build.")
+ endif()
+endif()
+set(PacBioBAM_BinDir ${PacBioBAM_OutputDir}/bin)
+set(PacBioBAM_LibDir ${PacBioBAM_OutputDir}/lib)
+
+set(GeneratedDir ${CMAKE_BINARY_DIR}/generated)
+set(GeneratedTestDataDir ${GeneratedDir}/data)
+file(MAKE_DIRECTORY ${PacBioBAM_BinDir})
+file(MAKE_DIRECTORY ${PacBioBAM_LibDir})
+file(MAKE_DIRECTORY ${GeneratedDir})
+file(MAKE_DIRECTORY ${GeneratedTestDataDir})
+
+# project configuration (keep this order)
+set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake ${CMAKE_MODULE_PATH})
+include(pbbam-ccache)
+include(pbbam-compilerflags)
+include(pbbam-libtype)
+include(pbbam-dependencies)
+
+# project components (keep this order)
+add_subdirectory(src)
+add_subdirectory(tools)
+add_subdirectory(docs)
+add_subdirectory(tests)
--- /dev/null
+# PacBio::BAM - building & integrating\r
+\r
+Detailed build instructions can be found [here](http://pbbam.readthedocs.org/en/latest/getting_started.html).\r
--- /dev/null
+Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted (subject to the limitations in the
+disclaimer below) provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of Pacific Biosciences nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
--- /dev/null
+# pbbam
+
+[](https://travis-ci.org/PacificBiosciences/pbbam) [](http://pbbam.readthedocs.org/en/latest/?badge=latest)
+
+As of the 3.0 release of SMRTanalysis, PacBio is embracing the industry standard BAM
+format for (both aligned and unaligned) basecall data files. We have also formulated
+a BAM companion file format (bam.pbi) enabling fast access to a richer set of per-read
+information as well as compatibility for software built around the legacy cmp.h5 format.
+
+The **pbbam** software package provides components to create, query, & edit PacBio BAM
+files and associated indices. These components include a core C++ library, bindings for
+additional languages, and command-line utilities.
+
+### Note:
+
+This library is **not** intended to be used as a general-purpose BAM utility - all input & output BAMs must adhere to the [PacBio BAM format specification](https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/BAM.rst). Non-PacBio BAMs will cause exceptions to be thrown.
+
+## Documentation
+
+ - [Documentation Home](http://pbbam.readthedocs.org/en/latest/index.html)
+ - [Getting Started](http://pbbam.readthedocs.org/en/latest/getting_started.html)
+ - [C++ API Reference](http://pbbam.readthedocs.org/en/latest/api_reference.html)
+
+ - [Changelog](https://github.com/PacificBiosciences/pbbam/blob/master/CHANGELOG.md)
+
+## License
+
+ - [PacBio open source license](https://github.com/PacificBiosciences/pbbam/blob/master/LICENSE.txt)
+
--- /dev/null
+#
+# A CMake Module for finding and using C# (.NET and Mono).
+#
+# The following variables are set:
+# CSHARP_FOUND - set to ON if C# is found
+# CSHARP_USE_FILE - the path to the C# use file
+# CSHARP_TYPE - the type of the C# compiler (eg. ".NET" or "Mono")
+# CSHARP_VERSION - the version of the C# compiler (eg. "v4.0" or "2.10.2")
+# CSHARP_COMPILER - the path to the C# compiler executable (eg. "C:/Windows/Microsoft.NET/Framework/v4.0.30319/csc.exe" or "/usr/bin/gmcs")
+# CSHARP_INTERPRETER - the path to interpreter needed to run CSharp executables
+# CSHARP_PLATFORM - the C# target platform
+# CSHARP_SDK - the SDK commandline switch (empty for .NET, for Mono eg. "/sdk:2" or "/sdk:4")
+#
+# This file is based on the work of GDCM:
+# http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/FindCSharp.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre@gmail.com>
+#
+
+# TODO: ADD ABILITY TO SELECT WHICH C# COMPILER eg. .NET or Mono (if both exist). For the moment, .NET is selected above Mono.
+
+# Make sure find package macros are included
+include( FindPackageHandleStandardArgs )
+
+unset( CSHARP_COMPILER CACHE )
+unset( CSHARP_INTERPRETER CACHE )
+unset( CSHARP_TYPE CACHE )
+unset( CSHARP_VERSION CACHE )
+unset( CSHARP_FOUND CACHE )
+
+# By default use anycpu platform, allow the user to override
+set( CSHARP_PLATFORM "anycpu" CACHE STRING "C# target platform: x86, x64, anycpu, or itanium" )
+if( NOT ${CSHARP_PLATFORM} MATCHES "x86|x64|anycpu|itanium" )
+ message( FATAL_ERROR "The C# target platform '${CSHARP_PLATFORM}' is not valid. Please enter one of the following: x86, x64, anycpu, or itanium" )
+endif( )
+
+if( WIN32 )
+ find_package( DotNetFrameworkSdk )
+ if( NOT CSHARP_DOTNET_FOUND )
+ find_package( Mono )
+ endif( )
+else( UNIX )
+ find_package( Mono )
+endif( )
+
+if( CSHARP_DOTNET_FOUND )
+ set( CSHARP_TYPE ".NET" CACHE STRING "Using the .NET compiler" )
+ set( CSHARP_VERSION ${CSHARP_DOTNET_VERSION} CACHE STRING "C# .NET compiler version" FORCE )
+ set( CSHARP_COMPILER ${CSHARP_DOTNET_COMPILER_${CSHARP_DOTNET_VERSION}} CACHE STRING "Full path to .NET compiler" FORCE )
+ set( CSHARP_INTERPRETER "" CACHE INTERNAL "Interpretor not required for .NET" FORCE )
+elseif( CSHARP_MONO_FOUND )
+ set( CSHARP_TYPE "Mono" CACHE STRING "Using the Mono compiler" )
+ set( CSHARP_VERSION ${CSHARP_MONO_VERSION} CACHE STRING "C# Mono compiler version" FORCE )
+ set( CSHARP_COMPILER ${CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION}} CACHE STRING "Full path to Mono compiler" FORCE )
+ set( CSHARP_INTERPRETER ${CSHARP_MONO_INTERPRETER_${CSHARP_MONO_VERSION}} CACHE STRING "Full path to Mono interpretor" FORCE )
+ set( CSHARP_SDK "/sdk:4.5" CACHE STRING "C# Mono SDK commandline switch (e.g. /sdk:2, /sdk:4, /sdk:5)" )
+endif( )
+
+# Handle WIN32 specific issues
+if ( WIN32 )
+ if ( CSHARP_COMPILER MATCHES "bat" )
+ set( CSHARP_COMPILER "call ${CSHARP_COMPILER}" )
+ endif ( )
+endif( )
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(CSharp DEFAULT_MSG CSHARP_TYPE CSHARP_VERSION CSHARP_COMPILER)
+
+mark_as_advanced( CSHARP_TYPE CSHARP_VERSION CSHARP_COMPILER CSHARP_INTERPRETER CSHARP_PLATFORM CSHARP_SDK )
+
+# Set the USE_FILE path
+# http://public.kitware.com/Bug/view.php?id=7757
+get_filename_component( current_list_path ${CMAKE_CURRENT_LIST_FILE} PATH )
+set( CSHARP_USE_FILE ${current_list_path}/UseCSharp.cmake )
--- /dev/null
+# Set paths and vars for .NET compilers
+# This is hand-rolled because I had problems with the one from SimpleITK
+
+#
+# The following variables are set:
+# CSHARP_DOTNET_FOUND
+# CSHARP_DOTNET_COMPILER_${version} eg. "CSHARP_DOTNET_COMPILER_v4.0.30319"
+# CSHARP_DOTNET_VERSION eg. "v4.0.30319"
+# CSHARP_DOTNET_VERSIONS eg. "v2.0.50727, v3.5, v4.0.30319"
+# DotNetFrameworkSdk_USE_FILE
+#
+# CSHARP_PROJECT_BUILDER (xbuild/msbuild)
+
+set(framework_dir "C:/Windows/Microsoft.NET/Framework")
+
+set(CSHARP_DOTNET_VERSION "v4.0.30319")
+set(CSHARP_DOTNET_VERSIONS "")
+set(CSHARP_DOTNET_COMPILER_${CSHARP_DOTNET_VERSION} "${framework_dir}/${CSHARP_DOTNET_VERSION}/csc.exe")
+set(CSHARP_PROJECT_BUILDER "${framework_dir}/${CSHARP_DOTNET_VERSION}/MSBuild.exe")
+
+if(EXISTS ${CSHARP_DOTNET_COMPILER_${CSHARP_DOTNET_VERSION}})
+ set(CSHARP_DOTNET_FOUND 1)
+else()
+ set(CSHARP_DOTNET_FOUND 0)
+endif()
+
+# Set USE_FILE
+get_filename_component( current_list_path ${CMAKE_CURRENT_LIST_FILE} PATH )
+set( DotNetFrameworkSdk_USE_FILE ${current_list_path}/UseDotNetFrameworkSdk.cmake )
\ No newline at end of file
--- /dev/null
+#
+# A CMake Module for finding Mono.
+#
+# The following variables are set:
+# CSHARP_MONO_FOUND
+# CSHARP_MONO_COMPILER_${version} eg. "CSHARP_MONO_COMPILER_2.10.2"
+# CSHARP_MONO_INTERPRETOR_${version} eg. "CSHARP_MONO_INTERPRETOR_2.10.2"
+# CSHARP_MONO_VERSION eg. "2.10.2"
+# CSHARP_MONO_VERSIONS eg. "2.10.2, 2.6.7"
+#
+# Additional references can be found here:
+# http://www.mono-project.com/Main_Page
+# http://www.mono-project.com/CSharp_Compiler
+# http://mono-project.com/FAQ:_Technical (How can I tell where the Mono runtime is installed)
+#
+# This file is based on the work of GDCM:
+# http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/FindMono.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre@gmail.com>
+#
+
+set( csharp_mono_valid 1 )
+if( DEFINED CSHARP_MONO_FOUND )
+ # The Mono compiler has already been found
+ # It may have been reset by the user, verify it is correct
+ if( NOT DEFINED CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION} )
+ set( csharp_mono_version_user ${CSHARP_MONO_VERSION} )
+ set( csharp_mono_valid 0 )
+ set( CSHARP_MONO_FOUND 0 )
+ set( CSHARP_MONO_VERSION "CSHARP_MONO_VERSION-NOTVALID" CACHE STRING "C# Mono compiler version, choices: ${CSHARP_MONO_VERSIONS}" FORCE )
+ message( FATAL_ERROR "The C# Mono version '${csharp_mono_version_user}' is not valid. Please enter one of the following: ${CSHARP_MONO_VERSIONS}" )
+ endif( NOT DEFINED CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION} )
+endif( DEFINED CSHARP_MONO_FOUND )
+
+unset( CSHARP_MONO_VERSIONS CACHE ) # Clear versions
+if( WIN32 )
+ # Search for Mono on Win32 systems
+ # See http://mono-project.com/OldReleases and http://www.go-mono.com/mono-downloads/download.html
+ set( csharp_mono_bin_dirs )
+ set( csharp_mono_search_hints
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.11.2;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.9;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.8;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.7;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.6;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.5;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.4;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.3;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.2;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10.1;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.10;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.8;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6.7;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6.4;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6.3;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6.1;SdkInstallRoot]/bin"
+ "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono\\2.6;SdkInstallRoot]/bin"
+ )
+ foreach( csharp_mono_search_hint ${csharp_mono_search_hints} )
+ get_filename_component( csharp_mono_bin_dir "${csharp_mono_search_hint}" ABSOLUTE )
+ if ( EXISTS "${csharp_mono_bin_dir}" )
+ set( csharp_mono_bin_dirs ${csharp_mono_bin_dirs} ${csharp_mono_bin_dir} )
+ endif ( EXISTS "${csharp_mono_bin_dir}" )
+ endforeach( csharp_mono_search_hint )
+ # TODO: Use HKLM_LOCAL_MACHINE\Software\Novell\Mono\DefaultCLR to specify default version
+ # get_filename_component( test "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Novell\\Mono;DefaultCLR]" NAME )
+
+ foreach ( csharp_mono_bin_dir ${csharp_mono_bin_dirs} )
+ string( REPLACE "\\" "/" csharp_mono_bin_dir ${csharp_mono_bin_dir} )
+ if (EXISTS "${csharp_mono_bin_dir}/dmcs.bat")
+ set( csharp_mono_executable "${csharp_mono_bin_dir}/dmcs.bat")
+ elseif (EXISTS "${csharp_mono_bin_dir}/gmcs.bat")
+ set( csharp_mono_executable "${csharp_mono_bin_dir}/gmcs.bat")
+ elseif (EXISTS "${csharp_mono_bin_dir}/mcs.bat")
+ set( csharp_mono_executable "${csharp_mono_bin_dir}/mcs.bat")
+ endif (EXISTS "${csharp_mono_bin_dir}/dmcs.bat")
+
+ if( csharp_mono_valid )
+ # Extract version number (eg. 2.10.2)
+ string(REGEX MATCH "([0-9]*)([.])([0-9]*)([.]*)([0-9]*)" csharp_mono_version_temp ${csharp_mono_bin_dir})
+ set( CSHARP_MONO_VERSION ${csharp_mono_version_temp} CACHE STRING "C# Mono compiler version" )
+ mark_as_advanced( CSHARP_MONO_VERSION )
+
+ # Add variable holding executable
+ set( CSHARP_MONO_COMPILER_${csharp_mono_version_temp} ${csharp_mono_executable} CACHE STRING "C# Mono compiler ${csharp_mono_version_temp}" FORCE )
+ mark_as_advanced( CSHARP_MONO_COMPILER_${csharp_mono_version_temp} )
+
+ # Set interpreter
+ if (EXISTS "${csharp_mono_bin_dir}/mono.exe")
+ set( CSHARP_MONO_INTERPRETER_${csharp_mono_version_temp} "${csharp_mono_bin_dir}/mono.exe" CACHE STRING "C# Mono interpreter ${csharp_mono_version_temp}" FORCE )
+ mark_as_advanced( CSHARP_MONO_INTERPRETER_${csharp_mono_version_temp} )
+ endif (EXISTS "${csharp_mono_bin_dir}/mono.exe")
+ endif( csharp_mono_valid )
+
+ # Create a list of supported compiler versions
+ if( NOT DEFINED CSHARP_MONO_VERSIONS )
+ set( CSHARP_MONO_VERSIONS "${csharp_mono_version_temp}" CACHE STRING "Available C# Mono compiler versions" FORCE )
+ else( NOT DEFINED CSHARP_MONO_VERSIONS )
+ set( CSHARP_MONO_VERSIONS "${CSHARP_MONO_VERSIONS}, ${csharp_mono_version_temp}" CACHE STRING "Available C# Mono versions" FORCE )
+ endif( NOT DEFINED CSHARP_MONO_VERSIONS )
+ mark_as_advanced( CSHARP_MONO_VERSIONS )
+
+ # We found at least one Mono compiler version
+ set( CSHARP_MONO_FOUND 1 CACHE INTERNAL "Boolean indicating if C# Mono was found" )
+ endforeach( csharp_mono_bin_dir )
+
+else( UNIX )
+ # Search for Mono on non-Win32 systems
+ set( chsarp_mono_names "mcs" "mcs.exe" "dmcs" "dmcs.exe" "smcs" "smcs.exe" "gmcs" "gmcs.exe" )
+ set(
+ csharp_mono_paths
+ "/usr/bin/"
+ "/usr/local/bin/"
+ "/usr/lib/mono/2.0"
+ "/opt/novell/mono/bin"
+ )
+ find_program(
+ csharp_mono_compiler # variable is added to the cache, we removed it below
+ NAMES ${chsarp_mono_names}
+ PATHS ${csharp_mono_paths}
+ )
+
+ if( EXISTS ${csharp_mono_compiler} )
+ # Determine version
+ find_program(
+ csharp_mono_interpreter # variable is added to the cache, we removed it below
+ NAMES mono
+ PATHS ${csharp_mono_paths}
+ )
+ if ( EXISTS ${csharp_mono_interpreter} )
+ execute_process(
+ COMMAND ${csharp_mono_interpreter} -V
+ OUTPUT_VARIABLE csharp_mono_version_string
+ )
+ string( REGEX MATCH "([0-9]*)([.])([0-9]*)([.]*)([0-9]*)" csharp_mono_version_temp ${csharp_mono_version_string} )
+ set( CSHARP_MONO_INTERPRETER_${CSHARP_MONO_VERSION} ${csharp_mono_interpreter} CACHE STRING "C# Mono interpreter ${csharp_mono_version_temp}" FORCE )
+ mark_as_advanced( CSHARP_MONO_INTERPRETER_${CSHARP_MONO_VERSION} )
+ endif ( EXISTS ${csharp_mono_interpreter} )
+ unset( csharp_mono_interpreter CACHE )
+
+ # We found Mono compiler
+ set( CSHARP_MONO_VERSION ${csharp_mono_version_temp} CACHE STRING "C# Mono compiler version" )
+ mark_as_advanced( CSHARP_MONO_VERSION )
+ set( CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION} ${csharp_mono_compiler} CACHE STRING "C# Mono compiler ${CSHARP_MONO_VERSION}" FORCE )
+ mark_as_advanced( CSHARP_MONO_COMPILER_${CSHARP_MONO_VERSION} )
+ set( CSHARP_MONO_VERSIONS ${CSHARP_MONO_VERSION} CACHE STRING "Available C# Mono compiler versions" FORCE )
+ mark_as_advanced( CSHARP_MONO_VERSIONS )
+ set( CSHARP_MONO_FOUND 1 CACHE INTERNAL "Boolean indicating if C# Mono was found" )
+
+ # Assume xbuild is just xbuild.
+ set(CSHARP_PROJECT_BUILDER "xbuild")
+
+
+ endif( EXISTS ${csharp_mono_compiler} )
+
+ # Remove temp variable from cache
+ unset( csharp_mono_compiler CACHE )
+
+endif( WIN32 )
+
+if( CSHARP_MONO_FOUND )
+ # Report the found versions
+ message( STATUS "Found the following C# Mono versions: ${CSHARP_MONO_VERSIONS}" )
+endif( CSHARP_MONO_FOUND )
+
+# Set USE_FILE
+get_filename_component( current_list_path ${CMAKE_CURRENT_LIST_FILE} PATH )
+set( Mono_USE_FILE ${current_list_path}/UseMono.cmake )
--- /dev/null
+
+#
+# - This module locates an installed R distribution.
+#
+# Defines the following:
+#
+# R_INCLUDE_DIR - Path to R include directory
+# R_LIBRARIES - Path to R library
+# R_LIBRARY_BASE -
+# R_COMMAND - Path to R command
+# RSCRIPT_EXECUTABLE - Path to Rscript command
+#
+
+
+# Make sure find package macros are included
+include( FindPackageHandleStandardArgs )
+
+set(TEMP_CMAKE_FIND_APPBUNDLE ${CMAKE_FIND_APPBUNDLE})
+set(CMAKE_FIND_APPBUNDLE "NEVER")
+find_program(R_COMMAND R DOC "R executable.")
+if(R_COMMAND)
+ execute_process(WORKING_DIRECTORY . COMMAND ${R_COMMAND} RHOME OUTPUT_VARIABLE R_BASE_DIR OUTPUT_STRIP_TRAILING_WHITESPACE)
+ set(R_HOME ${R_BASE_DIR} CACHE PATH "R home directory obtained from R RHOME")
+ mark_as_advanced(R_HOME)
+endif(R_COMMAND)
+
+find_program(RSCRIPT_EXECUTABLE Rscript DOC "Rscript executable.")
+
+set(CMAKE_FIND_APPBUNDLE ${TEMP_CMAKE_FIND_APPBUNDLE})
+
+# R.h gets installed in all sorts of places -
+# ubuntu: /usr/share/R/include, RHEL/Fedora: /usr/include/R/R.h
+find_path(R_INCLUDE_DIR R.h PATHS ${R_INCLUDE_DIR_HINT} /usr/local/lib /usr/local/lib64 /usr/share /usr/include ${R_BASE_DIR} PATH_SUFFIXES include R R/include DOC "Path to file R.h")
+find_library(R_LIBRARY_BASE R PATHS ${R_BASE_DIR} PATH_SUFFIXES /lib DOC "R library (example libR.a, libR.dylib, etc.).")
+
+set(R_LIBRARIES ${R_LIBRARY_BASE})
+mark_as_advanced(RSCRIPT_EXECUTABLE R_LIBRARIES R_INCLUDE_DIR R_COMMAND R_LIBRARY_BASE)
+
+
+set( _REQUIRED_R_VARIABLES R_INCLUDE_DIR R_COMMAND )
+
+if( APPLE )
+ # On linux platform some times the libR.so is not available, however
+ # on apple a link error results if the library is linked.
+ list( APPEND _REQUIRED_R_VARIABLES R_LIBRARIES R_LIBRARY_BASE )
+endif()
+
+find_package_handle_standard_args(R DEFAULT_MSG ${_REQUIRED_R_VARIABLES} )
--- /dev/null
+include(CMakeParseArguments)
+
+function(create_pbbam_tool)
+
+ # parse args
+ set(oneValueArgs TARGET)
+ set(multiValueArgs SOURCES)
+ cmake_parse_arguments(create_pbbam_tool "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ # create executable
+ include_directories(
+ ${ToolsCommonDir} # shared tool code
+ ${GeneratedDir} # generated version headers
+ ${PacBioBAM_INCLUDE_DIRS} # pbbam/htslib includes
+ )
+ add_executable(${create_pbbam_tool_TARGET} ${create_pbbam_tool_SOURCES})
+ set_target_properties(
+ ${create_pbbam_tool_TARGET} PROPERTIES
+ RUNTIME_OUTPUT_DIRECTORY ${PacBioBAM_BinDir}
+ )
+ target_link_libraries(${create_pbbam_tool_TARGET} pbbam)
+
+endfunction(create_pbbam_tool)
--- /dev/null
+# CMake Module for finding and using C# (.NET and Mono).
+#
+# The following global variables are assumed to exist:
+# CSHARP_SOURCE_DIRECTORY - path to C# sources
+# CSHARP_BINARY_DIRECTORY - path to place resultant C# binary files
+#
+# The following variables are set:
+# CSHARP_TYPE - the type of the C# compiler (eg. ".NET" or "Mono")
+# CSHARP_COMPILER - the path to the C# compiler executable (eg. "C:/Windows/Microsoft.NET/Framework/v4.0.30319/csc.exe")
+# CSHARP_VERSION - the version number of the C# compiler (eg. "v4.0.30319")
+#
+# The following macros are defined:
+# CSHARP_ADD_EXECUTABLE( name references [files] [output_dir] ) - Define C# executable with the given name
+# CSHARP_ADD_LIBRARY( name references [files] [output_dir] ) - Define C# library with the given name
+#
+# Examples:
+# CSHARP_ADD_EXECUTABLE( MyExecutable "" "Program.cs" )
+# CSHARP_ADD_EXECUTABLE( MyExecutable "ref1.dll ref2.dll" "Program.cs File1.cs" )
+# CSHARP_ADD_EXECUTABLE( MyExecutable "ref1.dll;ref2.dll" "Program.cs;File1.cs" )
+#
+# This file is based on the work of GDCM:
+# http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/UseCSharp.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre@gmail.com>
+#
+
+# TODO: ADD SUPPORT FOR LINK LIBRARIES
+
+# Check something was found
+if( NOT CSHARP_COMPILER )
+ message( WARNING "A C# compiler executable was not found on your system" )
+endif( NOT CSHARP_COMPILER )
+
+# Include type-based USE_FILE
+if( CSHARP_TYPE MATCHES ".NET" )
+ include( ${DotNetFrameworkSdk_USE_FILE} )
+elseif ( CSHARP_TYPE MATCHES "Mono" )
+ include( ${Mono_USE_FILE} )
+endif ( CSHARP_TYPE MATCHES ".NET" )
+
+macro( CSHARP_ADD_LIBRARY name )
+ CSHARP_ADD_PROJECT( "library" ${name} ${ARGN} )
+endmacro( CSHARP_ADD_LIBRARY )
+
+macro( CSHARP_ADD_EXECUTABLE name )
+ CSHARP_ADD_PROJECT( "exe" ${name} ${ARGN} )
+endmacro( CSHARP_ADD_EXECUTABLE )
+
+# Private macro
+macro( CSHARP_ADD_PROJECT type name )
+ set( refs "/reference:System.dll" )
+ set( sources )
+ set( sources_dep )
+
+ if( ${type} MATCHES "library" )
+ set( output "dll" )
+ elseif( ${type} MATCHES "exe" )
+ set( output "exe" )
+ endif( ${type} MATCHES "library" )
+
+ # Step through each argument
+ foreach( it ${ARGN} )
+ if( ${it} MATCHES "(.*)(dll)" )
+ # Argument is a dll, add reference
+ list( APPEND refs /reference:${it} )
+ else( )
+ # Argument is a source file
+ if( EXISTS ${it} )
+ list( APPEND sources ${it} )
+ list( APPEND sources_dep ${it} )
+ elseif( EXISTS ${CSHARP_SOURCE_DIRECTORY}/${it} )
+ list( APPEND sources ${CSHARP_SOURCE_DIRECTORY}/${it} )
+ list( APPEND sources_dep ${CSHARP_SOURCE_DIRECTORY}/${it} )
+ elseif( ${it} MATCHES "[*]" )
+ # For dependencies, we need to expand wildcards
+ FILE( GLOB it_glob ${it} )
+ list( APPEND sources ${it} )
+ list( APPEND sources_dep ${it_glob} )
+ endif( )
+ endif ( )
+ endforeach( )
+
+ # Check we have at least one source
+ list( LENGTH sources_dep sources_length )
+ if ( ${sources_length} LESS 1 )
+ MESSAGE( SEND_ERROR "No C# sources were specified for ${type} ${name}" )
+ endif ()
+ list( SORT sources_dep )
+
+ # Perform platform specific actions
+ if (WIN32)
+ string( REPLACE "/" "\\" sources ${sources} )
+ else (UNIX)
+ string( REPLACE "\\" "/" sources ${sources} )
+ endif (WIN32)
+
+ # Add custom target and command
+ MESSAGE( STATUS "Adding C# ${type} ${name}: '${CSHARP_COMPILER} /t:${type} /out:${name}.${output} /platform:${CSHARP_PLATFORM} ${CSHARP_SDK} ${refs} ${sources}'" )
+ add_custom_command(
+ COMMENT "Compiling C# ${type} ${name}: '${CSHARP_COMPILER} /t:${type} /out:${name}.${output} /platform:${CSHARP_PLATFORM} ${CSHARP_SDK} ${refs} ${sources}'"
+ OUTPUT ${CSHARP_BINARY_DIRECTORY}/${name}.${output}
+ COMMAND ${CSHARP_COMPILER}
+ ARGS /t:${type} /out:${name}.${output} /platform:${CSHARP_PLATFORM} ${CSHARP_SDK} ${refs} ${sources}
+ WORKING_DIRECTORY ${CSHARP_BINARY_DIRECTORY}
+ DEPENDS ${sources_dep}
+ )
+ add_custom_target(
+ ${name} ALL
+ DEPENDS ${CSHARP_BINARY_DIRECTORY}/${name}.${output}
+ SOURCES ${sources_dep}
+ )
+endmacro( CSHARP_ADD_PROJECT )
--- /dev/null
+#
+# A CMake Module for using Mono.
+#
+# The following variables are set:
+# (none)
+#
+# Additional references can be found here:
+# http://www.mono-project.com/Main_Page
+# http://www.mono-project.com/CSharp_Compiler
+#
+# This file is based on the work of GDCM:
+# http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/FindMono.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre@gmail.com>
+#
+
+message( STATUS "Using .NET compiler version ${CSHARP_DOTNET_VERSION}" )
\ No newline at end of file
--- /dev/null
+#
+# A CMake Module for using Mono.
+#
+# The following variables are set:
+# (none)
+#
+# Additional references can be found here:
+# http://www.mono-project.com/Main_Page
+# http://www.mono-project.com/CSharp_Compiler
+#
+# This file is based on the work of GDCM:
+# http://gdcm.svn.sf.net/viewvc/gdcm/trunk/CMake/FindMono.cmake
+# Copyright (c) 2006-2010 Mathieu Malaterre <mathieu.malaterre@gmail.com>
+#
+
+message( STATUS "Using Mono compiler version ${CSHARP_MONO_VERSION}" )
--- /dev/null
+
+if(PacBioBAM_use_ccache)
+ find_program(CCACHE_FOUND ccache)
+ if(CCACHE_FOUND)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
+ set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
+ endif()
+endif()
--- /dev/null
+
+include(CheckCXXCompilerFlag)
+
+# C++11 check & enabling
+if (CMAKE_VERSION VERSION_LESS "3.1")
+ if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") # clang
+ else()
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # gcc
+ endif()
+else() # 3.1+
+ set(CMAKE_CXX_STANDARD 11)
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
+endif()
+
+# shared CXX flags for src & tests
+if (MSVC)
+ set(PacBioBAM_CXX_FLAGS "/Wall")
+else()
+ set(PacBioBAM_CXX_FLAGS "-Wall")
+endif()
+
+# NOTE: -Wno-unused-local-typedefs used to quash clang warnings w/ Boost
+check_cxx_compiler_flag("-Wno-unused-local-typedefs" HAS_NO_UNUSED_LOCAL_TYPEDEFS)
+if(HAS_NO_UNUSED_LOCAL_TYPEDEFS)
+ set(PacBioBAM_CXX_FLAGS "${PacBioBAM_CXX_FLAGS} -Wno-unused-local-typedefs")
+endif()
+
+check_cxx_compiler_flag("-Wno-sign-compare" HAS_NO_SIGN_COMPARE)
+if(HAS_NO_SIGN_COMPARE)
+ set(PacBioBAM_CXX_FLAGS "${PacBioBAM_CXX_FLAGS} -Wno-sign-compare")
+endif()
+
+# Turn on windows-style filepath resolution.
+# We need to add this #define early (not just in the C# SWIG wrapper)
+if(WIN32 AND PacBioBAM_wrap_csharp)
+ add_definitions(-DPBBAM_WIN_FILEPATHS)
+endif()
+
+# For now, keep @rpath out of install names on OS X, as it causes SWIG
+# tests to fail.
+if(APPLE)
+ set(CMAKE_MACOSX_RPATH OFF)
+endif()
--- /dev/null
+
+# pthreads
+find_package(Threads REQUIRED)
+
+# boost
+if(NOT Boost_INCLUDE_DIRS)
+ find_package(Boost REQUIRED)
+endif()
+
+# Winsock for htslib on Windows
+if(WIN32)
+ set(SOCKET_LIBRARIES "ws2_32")
+endif()
+
+# zlib
+if(NOT ZLIB_INCLUDE_DIRS OR NOT ZLIB_LIBRARIES)
+ find_package(ZLIB REQUIRED)
+endif()
+
+# htslib
+if(NOT HTSLIB_INCLUDE_DIRS OR NOT HTSLIB_LIBRARIES)
+ add_subdirectory(third-party/htslib external/htslib)
+endif()
--- /dev/null
+
+# determine if we need a shared lib
+if(PacBioBAM_build_shared OR ${wrapping_swig})
+ set(BUILD_SHARED_LIBS ON)
+ set(htslib_build_shared ON CACHE BOOL "force htslibConfig to export proper library name")
+ set(PB_LIB_MODE SHARED)
+ set(PB_LIB_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
+else()
+ set(BUILD_SHARED_LIBS OFF)
+ set(PB_LIB_MODE STATIC)
+ set(PB_LIB_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
+endif()
+
+if(WIN32)
+ # Limit the number of DLLs we will have to bundle
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -static-libgcc -static-libstdc++")
+ set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -static-libgcc -static-libstdc++")
+endif()
+
+
+
--- /dev/null
+find_package(Doxygen)
+
+if(DOXYGEN_FOUND)
+ configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${PacBioBAM_DocsDir}/Doxyfile @ONLY )
+ add_custom_target(doc
+ ${DOXYGEN_EXECUTABLE} ${PacBioBAM_DocsDir}/Doxyfile
+ WORKING_DIRECTORY ${PacBioBAM_DocsDir}
+ COMMENT "Generating API documentation with Doxygen"
+ VERBATIM
+ )
+endif()
--- /dev/null
+# Doxyfile 1.6.3
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME = @PacBioBAM_NAME@
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER = @PacBioBAM_VERSION@
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY = @PacBioBAM_DocsDir@
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF = "The $name class" \
+ "The $name widget" \
+ "The $name file" \
+ is \
+ provides \
+ specifies \
+ contains \
+ represents \
+ a \
+ an \
+ the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH = @PacBioBAM_IncludeDir@
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE = 1
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES =
+
+#samSpecURL=http://samtools.sourceforge.net/SAM1.pdf
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it parses.
+# With this tag you can assign which parser to use for a given extension.
+# Doxygen has a built-in mapping, but you can override or extend it using this tag.
+# The format is ext=language, where ext is a file extension, and language is one of
+# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP,
+# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat
+# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C. Note that for custom extensions you also need to set
+# FILE_PATTERNS otherwise the files are not read by doxygen.
+
+EXTENSION_MAPPING =
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT = YES
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate getter
+# and setter methods for a property. Setting this option to YES (the default)
+# will make doxygen to replace the get and set methods by a property in the
+# documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING = YES
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT = NO
+
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penality.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will rougly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols
+
+SYMBOL_CACHE_SIZE = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES = NO
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespace are hidden.
+
+EXTRACT_ANON_NSPACES = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
+# will list include files with double quotes in the documentation
+# rather than with sharp brackets.
+
+FORCE_LOCAL_INCLUDES = NO
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
+# will sort the (brief and detailed) documentation of class members so that
+# constructors and destructors are listed first. If set to NO (the default)
+# the constructors will appear in the respective orders defined by
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page. This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by
+# doxygen. The layout file controls the global structure of the generated output files
+# in an output format independent way. The create the layout file that represents
+# doxygen's defaults, run doxygen with the -l option. You can optionally specify a
+# file name after the option, if omitted DoxygenLayout.xml will be used as the name
+# of the layout file.
+
+LAYOUT_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT = @PacBioBAM_IncludeDir@
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS = *.c \
+ *.cc \
+ *.cxx \
+ *.cpp \
+ *.c++ \
+ *.d \
+ *.java \
+ *.ii \
+ *.ixx \
+ *.ipp \
+ *.i++ \
+ *.inl \
+ *.h \
+ *.hh \
+ *.hxx \
+ *.hpp \
+ *.h++ \
+ *.idl \
+ *.odl \
+ *.cs \
+ *.php \
+ *.php3 \
+ *.inc \
+ *.m \
+ *.mm \
+ *.dox \
+ *.py \
+ *.f90 \
+ *.f \
+ *.vhd \
+ *.vhdl
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE = @PacBioBAM_IncludeDir@/pbbam/internal
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
+# directories that are symbolic links (a Unix filesystem feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS = pugi, PacBio::BAM::internal
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH = examples
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output. If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
+# is applied to all files.
+
+FILTER_PATTERNS =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code. Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET =
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting
+# this to NO can help when comparing the output of multiple runs.
+
+HTML_TIMESTAMP = YES
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS = YES
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information.
+
+GENERATE_DOCSET = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID = org.doxygen.Project
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER
+# are set, an additional index file will be generated that can be used as input for
+# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated
+# HTML documentation.
+
+GENERATE_QHP = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add.
+# For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
+# will be generated, which together with the HTML files, form an Eclipse help
+# plugin. To install this plugin and make it available under the help contents
+# menu in Eclipse, the contents of the directory containing the HTML and XML
+# files needs to be copied into the plugins directory of eclipse. The name of
+# the directory within the plugins directory should be the same as
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
+# the help appears.
+
+GENERATE_ECLIPSEHELP = NO
+
+# A unique identifier for the eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have
+# this name.
+
+ECLIPSE_DOC_ID = org.doxygen.Project
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE = 4
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW = NO
+
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
+# and Class Hierarchy pages using a tree view instead of an ordered list.
+
+USE_INLINE_TREES = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH = 250
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE = 10
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box
+# for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using
+# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
+# (GENERATE_DOCSET) there is already a search function so this one should
+# typically be disabled. For large projects the javascript based search engine
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
+
+SEARCHENGINE = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a PHP enabled web server instead of at the web client
+# using Javascript. Doxygen will generate the search PHP script and index
+# file to put on the web server. The advantage of the server
+# based approach is that it scales better to large projects and allows
+# full text search. The disadvances is that it is more difficult to setup
+# and does not have live searching capabilities.
+
+SERVER_BASED_SEARCH = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+# Note that when enabling USE_PDFLATEX this option is only used for
+# generating bitmaps for formulas in the HTML output, but not in the
+# Makefile that is written to the output directory.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
+# source code with syntax highlighting in the LaTeX output.
+# Note that which sources are shown also depends on other settings
+# such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML = YES
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader. This is useful
+# if you want to understand what is going on. On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS = NO
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option is superseded by the HAVE_DOT option below. This is only a
+# fallback. It is recommended to install and use dot, since it yields more
+# powerful graphs.
+
+CLASS_DIAGRAMS = NO
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT = NO
+
+# By default doxygen will write a font called FreeSans.ttf to the output
+# directory and reference it in all dot files that doxygen generates. This
+# font does not include all possible unicode characters however, so when you need
+# these (or just want a differently looking font) you can specify the font name
+# using DOT_FONTNAME. You need need to make sure dot is able to find the font,
+# which can be done by putting it in a standard location or by setting the
+# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
+# containing the font.
+
+DOT_FONTNAME = FreeSans
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE = 10
+
+# By default doxygen will tell dot to use the output directory to look for the
+# FreeSans.ttf font (which doxygen will put there itself). If you specify a
+# different font using DOT_FONTNAME you can set the path where dot
+# can find it using this tag.
+
+DOT_FONTPATH =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP = YES
--- /dev/null
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+PAPER =
+BUILDDIR = build
+SOURCEDIR = source
+
+# Internal variables.
+PAPEROPT_a4 = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) $(SOURCEDIR)
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) $(SOURCEDIR)
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext fig
+
+help:
+ @echo "Please use \`make <target>' where <target> is one of"
+ @echo " html to make standalone HTML files"
+ @echo " dirhtml to make HTML files named index.html in directories"
+ @echo " singlehtml to make a single large HTML file"
+ @echo " pickle to make pickle files"
+ @echo " json to make JSON files"
+ @echo " htmlhelp to make HTML files and a HTML help project"
+ @echo " qthelp to make HTML files and a qthelp project"
+ @echo " devhelp to make HTML files and a Devhelp project"
+ @echo " epub to make an epub"
+ @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+ @echo " latexpdf to make LaTeX files and run them through pdflatex"
+ @echo " text to make text files"
+ @echo " man to make manual pages"
+ @echo " texinfo to make Texinfo files"
+ @echo " info to make Texinfo files and run them through makeinfo"
+ @echo " gettext to make PO message catalogs"
+ @echo " changes to make an overview of all changed/added/deprecated items"
+ @echo " linkcheck to check all external links for integrity"
+ @echo " doctest to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+ -rm -rf $(BUILDDIR)/*
+
+html: basefig MANY_CLUSTER.png
+ $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+ $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+ $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+ @echo
+ @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+ $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+ @echo
+ @echo "Build finished; now you can process the pickle files."
+
+json:
+ $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+ @echo
+ @echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+ $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+ @echo
+ @echo "Build finished; now you can run HTML Help Workshop with the" \
+ ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+ $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+ @echo
+ @echo "Build finished; now you can run "qcollectiongenerator" with the" \
+ ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+ @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pbtoolkits.qhcp"
+ @echo "To view the help file:"
+ @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pbtoolkits.qhc"
+
+devhelp:
+ $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+ @echo
+ @echo "Build finished."
+ @echo "To view the help file:"
+ @echo "# mkdir -p $$HOME/.local/share/devhelp/pbtoolkits"
+ @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pbtoolkits"
+ @echo "# devhelp"
+
+epub:
+ $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+ @echo
+ @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo
+ @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+ @echo "Run \`make' in that directory to run these through (pdf)latex" \
+ "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through pdflatex..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+ $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+ @echo
+ @echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+ $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+ @echo
+ @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo
+ @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+ @echo "Run \`make' in that directory to run these through makeinfo" \
+ "(use \`make info' here to do that automatically)."
+
+info:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo "Running Texinfo files through makeinfo..."
+ make -C $(BUILDDIR)/texinfo info
+ @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+ $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+ @echo
+ @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+ $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+ @echo
+ @echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+ $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+ @echo
+ @echo "Link check complete; look for any errors in the above output " \
+ "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+ $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+ @echo "Testing of doctests in the sources finished, look at the " \
+ "results in $(BUILDDIR)/doctest/output.txt."
+
+basefig:
+ dot -Tpng $(SOURCEDIR)/dependencies.dot > $(SOURCEDIR)/$@
+ grep -v "\"pbsmrtpipe\" ->" $(SOURCEDIR)/dependencies.dot \
+ | grep -v "> \"pbcore\"" \
+ | sed 's/All/Sparse/' > $(SOURCEDIR)/sparse_dependencies.dot
+ dot -Tpng $(SOURCEDIR)/sparse_dependencies.dot \
+ > $(SOURCEDIR)/sparse_dependencies.png
+
+%.png: basefig
+ grep -v $* $(SOURCEDIR)/sparse_dependencies.dot | \
+ grep -v \? | sed 's/Sparse dependencies/Module bundles/' | \
+ dot -Tpng > $(SOURCEDIR)/$@
+
--- /dev/null
+// using C++11 range-based for loop
+BarcodeQuery query(42, dataset);
+for (const BamRecord& r : query) {
+ assert(r.HasBarcodes());
+ assert(r.BarcodeForward() == 42 || r.barcodeReverse() == 42);
+}
+
+// OR
+
+// using iterators directly
+BarcodeQuery query(42, dataset);
+auto iter = query.cbegin();
+auto end = query.cend();
+for (; iter != end; ++iter) {
+ assert(iter->HasBarcodes());
+ assert(iter->BarcodeForward() == 42 || iter->barcodeReverse() == 42);
+}
--- /dev/null
+// sort on increasing ZMW hole number
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::Zmw());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::AlignedEnd());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::AlignedStart());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::AlignedStrand());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::BarcodeForward());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::BarcodeQuality());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::BarcodeReverse());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::FullName());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::LocalContextFlag());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::MapQuality());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::MovieName());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::NumDeletedBases());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::NumInsertedBases());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::NumMatches());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::NumMismatches());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::QueryEnd());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::QueryStart());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReadAccuracy());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReadGroupId());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReadGroupNumericId());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReferenceEnd());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReferenceId());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReferenceName());
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::ReferenceStart());
--- /dev/null
+Compare::Type type = Compare::TypeFromOperator("!=");
+assert(type == Compare::NOT_EQUAL);
--- /dev/null
+string name = Compare::TypeToName(Compare::LESS_THAN);
+assert(name = "Compare::LESS_THAN");
--- /dev/null
+std::vector<BamRecord> records;
+std::sort(records.begin(), records.end(), Compare::Zmw());
--- /dev/null
+// using C++11 range-based for loop
+EntireFileQuery query(dataset);
+for (const BamRecord& record : query) {
+ // ... do stuff ...
+}
+
+// OR
+
+// using iterators
+EntireFileQuery query(dataset);
+auto iter = query.cbegin();
+auto end = query.cend();
+for (; iter != end; ++iter) {
+ // ... do stuff ...
+}
--- /dev/null
+EntireFileQuery query("foo.bam");
+for (const BamRecord& record : query) {
+ // do stuff
+}
--- /dev/null
+EntireFileQuery query("foo.bam");
+for (BamRecord& record : query) {
+ // ok to modify 'record' here
+}
--- /dev/null
+// using C++11 range-based for loop
+GenomicIntervalQuery query(GenomicInterval("chr1:1000-2000"), dataset);
+for (const BamRecord& record : query) {
+ // ... do stuff ...
+}
+
+// OR
+
+// using iterators directly
+GenomicIntervalQuery query(GenomicInterval("chr1:1000-2000"), dataset);
+auto iter = query.cbegin();
+auto end = query.cend();
+for (; iter != end; ++iter) {
+ // ... do stuff ...
+}
+
--- /dev/null
+DataSet ds("data.xml");
+GenomicIntervalQuery query(GenomicInterval(), ds);
+for (const GenomicInterval& interval : intervals) {
+ query.Interval(interval);
+ for (const BamRecord& record : query) {}
+ // do stuff
+ }
+}
\ No newline at end of file
--- /dev/null
+PbiFilterQuery query(PbiAlignedEndFilter{3000, Compare::GREATER_THAN});
+for (const BamRecord& record : query) {
+ assert(record.AlignedEnd() > 3000);
+}
--- /dev/null
+PbiFilterQuery query(PbiAlignedLengthFilter{1000, Compare::GREATER_THAN});
+for (const BamRecord& record : query) {
+ assert((record.AlignedEnd() - record.AlignedStart()) > 1000);
+}
--- /dev/null
+PbiFilterQuery query(PbiAlignedStartFilter{3000, Compare::GREATER_THAN});
+for (const BamRecord& record : query) {
+ assert(record.AlignedStart() > 3000);
+}
--- /dev/null
+PbiFilterQuery query(PbiAlignedStrandFilter{Strand::FORWARD});
+for (const BamRecord& record : query) {
+ assert(record.AlignedStrand() == Strand::FORWARD);
+}
+
--- /dev/null
+// single value
+PbiFilter filter{ PbiBarcodeFilter{17} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ const auto barcodes = record.Barcodes();
+ assert(barcodes.first == 17 || barcodes.second == 17);
+}
+
+// whitelist
+vector<int16_t> whitelist = { 50, 100 };
+PbiFilter filter{ PbiBarcodeFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ const auto barcodes = record.Barcodes();
+ assert(barcodes.first == 50 || barcodes.second == 50 ||
+ barcodes.first == 100 || barcodes.second == 100);
+}
--- /dev/null
+// single value
+PbiFilter filter{ PbiBarcodeForwardFilter{50} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.BarcodeForward() == 50);
+}
+
+// whitelist
+vector<int16_t> whitelist = { 50, 100 };
+PbiFilter filter{ PbiBarcodeForwardFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.BarcodeForward() == 50 || record.BarcodeForward() == 100);
+}
+
--- /dev/null
+PbiFilter filter{ PbiBarcodeQualityFilter{42, Compare::GREATER_THAN_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.BarcodeQuality() >= 42);
+}
--- /dev/null
+// single value
+PbiFilter filter{ PbiBarcodeReverseFilter{50} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.BarcodeReverse() == 50);
+}
+
+// whitelist
+vector<int16_t> whitelist = { 50, 100 };
+PbiFilter filter{ PbiBarcodeReverseFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.BarcodeReverse() == 50 || record.BarcodeReverse() == 100);
+}
+
--- /dev/null
+PbiFilter filter{ PbiBarcodesFilter{17, 18} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.BarcodeForward() == 17 &&
+ record.BarcodeReverse() == 18);
+}
--- /dev/null
+// To simply create a PBI file from BAM, the following is the easiest method:
+//
+#include <pbbam/BamFile.h>
+#include <pbbam/PbiFile.h>
+
+BamFile bamFile("data.bam");
+PbiFile::CreateFrom(bamFile);
+
+
+// However if you need to perform additional operations while reading the BAM file,
+// you can do something like the following:
+//
+{
+ BamFile bamFile("data.bam");
+ PbiBuilder builder(bamFile.PacBioIndexFilename(),
+ bamFile.Header().Sequences().size());
+ BamReader reader(bamFile);
+ BamRecord b;
+ int64_t offset = reader.VirtualTell(); // first record's vOffset
+ while (reader.GetNext(b)) {
+
+ // store PBI recrod entry & get next record's vOffset
+ builder.AddRecord(b, offset);
+ offset = reader.VirtualTell();
+
+ // ... additional stuff as needed ...
+ }
+
+} // <-- PBI data will only be written here, as PbiBuilder goes out of scope
+
--- /dev/null
+BamWriter writer(...);
+PbiBuilder pbiBuilder(...);
+int64_t vOffset;
+BamRecord record;
+while (...) {
+
+ // ... populate record data ...
+
+ // write record to BAM and add PBI entry
+ writer.Write(record, &vOffset);
+ pbiBuilder.AddRecord(record, vOffset);
+}
--- /dev/null
+// setup filter
+PbiFilter filter;
+filter.Add(PbiZmwFilter(42));
+filter.Add(PbiReadAccuracyFilter(0.9, Compare::GREATER_THAN_EQUAL));
+
+// using C++11 range-based for loop
+PbiFilterQuery query(filter, dataset);
+for (const BamRecord& r : query) {
+ assert(r.HoleNumber() == 42);
+ assert(r.ReadAccuracy() >= 0.9);
+}
+
+// OR
+
+// using iterators directly
+PbiFilterQuery query(filter, dataset);
+auto iter = query.cbegin();
+auto end = query.cend();
+for (; iter != end; ++iter) {
+ assert(iter->HoleNumber() == 42);
+ assert(iter->ReadAccuracy() >= 0.9);
+}
--- /dev/null
+// (f1 && f2) || f3
+
+PbiFilter f1;
+PbiFilter f2;
+PbiFilter intersect_f1_f2 = PbiFilter::Intersection(f1, f2);
+
+PbiFilter f3;
+PbiFilter final = PbiFilter::Union(intersect_f1_f2, f3);
--- /dev/null
+struct MyCustomFilter
+{
+ bool Accepts(const PbiRawData& index, const size_t row) const
+ {
+ // Look up data for record at the provided row. Do any calculations
+ // necessary, then return whether that record passes your
+ // filter criteria.
+
+ return true;
+ }
+};
+
+// use in composite filters
+PbiFilter f;
+f.Add(PbiMovieNameFilter("foo"));
+f.Add(MyCustomFilter());
+
+// pass directly to PbiFilterQuery
+PbiFilterQuery query(MyCustomFilter(), "foo.bam");
+for (const BamRecord& record : query)
+ // ... do stuff ...
--- /dev/null
+bool Accepts(const PbiRawData& index, const size_t row) const;
--- /dev/null
+PbiFilter result{ PbiFilter::INTERSECT };
+result.Add(filters);
+return result;
--- /dev/null
+PbiFilter result{ PbiFilter::INTERSECT };
+result.Add(std::move(filters));
+return result;
--- /dev/null
+PbiFilter result{ PbiFilter::UNION };
+result.Add(filters);
+return result;
--- /dev/null
+PbiFilter result{ PbiFilter::UNION };
+result.Add(std::move(filters));
+return result;
--- /dev/null
+// single value
+PbiFilter filter{ PbiIdentityFilter{ 0.5, Compare::GREATER_THAN_EQUAL } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ // ... at least 50% of record was aligned ...
+}
--- /dev/null
+
+// --------------------
+// has adapter_before
+// --------------------
+
+PbiFilter filter{ PbiLocalContextFilter{LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ const bool hasAdapterBefore = (record.LocalContextFlags() & LocalContextFlags::ADAPTER_BEFORE) != 0;
+ assert(hasAdapterBefore);
+}
+
+// ----------------------------------
+// has any adapters, barcodes, etc.
+// ----------------------------------
+
+PbiFilter filter{ PbiLocalContextFilter{LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ const bool hasContext = (record.LocalContextFlags() != LocalContextFlags::NO_LOCAL_CONTEXT);
+ assert(hasContext);
+}
--- /dev/null
+PbiFilter filter{ PbiMapQualityFilter{75, Compare::GREATER_THAN_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.MapQuality() >= 75);
+}
--- /dev/null
+// single value
+PbiFilter filter{ PbiMovieFilter{ "foo" } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.MovieName() == "foo");
+}
+
+// whitelist
+vector<string> whitelist = { "foo", "bar" };
+PbiFilter filter{ PbiMovieNameFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.MovieName() == "foo" || record.MovieName() == "bar");
+}
--- /dev/null
+PbiFilter filter{ PbiNumDeletedBasesFilter{50, Compare::LESS_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.NumDeletedBases() < 50);
+}
+
--- /dev/null
+PbiFilter filter{ PbiNumInsertedBasesFilter{50, Compare::LESS_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.NumInsertedBases() < 50);
+}
+
--- /dev/null
+PbiFilter filter{ PbiNumMatchesFilter{2000, Compare::GREATER_THAN_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.NumMatches() >= 2000);
+}
+
--- /dev/null
+PbiFilter filter{ PbiNumMismatchesFilter{500, Compare::LESS_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.NumMismatches() < 500);
+}
+
--- /dev/null
+PbiFilter filter{ PbiQueryEndFilter{3000, Compare::GREATER_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.QueryEnd() > 3000);
+}
--- /dev/null
+PbiFilter filter{ PbiQueryLengthFilter{2000, Compare::GREATER_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert( (record.QueryEnd() - record.QueryStart()) > 2000 );
+}
--- /dev/null
+// single value
+PbiFilter filter{ PbiQueryNameFilter{ "movie_1/42/100_200" } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.FullName() == "movie_1/42/100_200");
+}
+
+// whitelist
+vector<string> whitelist = { "movie_1/42/100_200", "movie_3/24/300_500" };
+PbiFilter filter{ PbiQueryNameFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.FullName() == "movie_1/42/100_200" ||
+ record.FullName() == "movie_3/24/300_500");
+}
--- /dev/null
+PbiFilter filter{ PbiQueryStartFilter{3000, Compare::GREATER_THAN} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.QueryStart() > 3000);
+}
--- /dev/null
+PbiFilter filter{ PbiReadAccuracyFilter{0.8, Compare::GREATER_THAN_EQUAL} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadAccuracy() >= 0.8);
+}
--- /dev/null
+// -------------------------
+// numeric ID
+// -------------------------
+
+// single value
+PbiFilter filter{ PbiReadGroupFilter{ 2458765 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadGroupNumericId() == 2458765);
+}
+
+// whitelist
+vector<int32_t> whitelist = { 2458765, -32143 };
+PbiFilter filter{ PbiReadGroupFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadGroupNumericId() == 2458765 ||
+ record.ReadGroupNumericId() == -32143);
+}
+
+// -------------------------
+// printable ID
+// -------------------------
+
+// single value
+PbiFilter filter{ PbiReadGroupFilter{ "12B33F00" } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadGroupId() == "12B33F00");
+}
+
+// whitelist
+vector<string> whitelist = { "12B33F00", "123ABC77" };
+PbiFilter filter{ PbiReadGroupFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadGroupId() == "12B33F00" ||
+ record.ReadGroupId() == "123ABC77");
+}
+
+
+// -------------------------
+// read group
+// -------------------------
+
+BamFile file("foo.bam");
+BamHeader header = file.Header();
+assert(header.ReadGroups().size() > 1);
+
+// single value
+PbiFilter filter{ PbiReadGroupFilter{ header.ReadGroups()[0] } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadGroup() == header.ReadGroups()[0]);
+}
+
+// whitelist
+vector<ReadGroupInfo> whitelist = { header.ReadGroups()[0], header.ReadGroups()[1] };
+PbiFilter filter{ PbiReadGroupFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReadGroup() == header.ReadGroups()[0] ||
+ record.ReadGroup() == header.ReadGroups()[1]);
+}
--- /dev/null
+PbiFilter filter{ PbiReferenceEndFilter{ 2000 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReferenceEnd() == 2000);
+}
--- /dev/null
+// single value
+PbiFilter filter{ PbiReferenceIdFilter{ 4 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReferenceId() == 4);
+}
+
+// whitelist
+vector<int32_t> whitelist = { 0, 1 };
+PbiFilter filter{ PbiReferenceIdFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReferenceId() == 0 ||
+ record.ReferenceId() == 1);
+}
+
--- /dev/null
+// single value
+PbiFilter filter{ PbiReferenceNameFilter{ "chr1" } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReferenceName() == "chr1");
+}
+
+// whitelist
+vector<string> whitelist = { "chr1", "chr5" };
+PbiFilter filter{ PbiReferenceNameFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReferenceName() == "chr1" ||
+ record.ReferenceName() == "chr5");
+}
--- /dev/null
+PbiFilter filter{ PbiReferenceStartFilter{ 2000 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.ReferenceStart() == 2000);
+}
--- /dev/null
+// single value
+PbiFilter filter{ PbiZmwFilter{ 4000 } };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.HoleNumber() == 4000);
+}
+
+// whitelist
+vector<int32_t> whitelist = { 4000, 8000 };
+PbiFilter filter{ PbiZmwFilter{whitelist} };
+PbiFilterQuery query(filter);
+for (const BamRecord& record : query) {
+ assert(record.HoleNumber() == 4000 ||
+ record.HoleNumber() == 8000);
+}
+
--- /dev/null
+// using C++11 range-based for loop
+ReadAccuracyQuery query(0.9, Compare::GREATER_THAN_EQUAL, dataset);
+for (const BamRecord& r : query) {
+ assert(r.ReadAccuracy() >= 0.9);
+}
+
+// OR
+
+// using iterators directly
+ReadAccuracyQuery query(0.9, Compare::GREATER_THAN_EQUAL, dataset);
+auto iter = query.cbegin();
+auto end = query.cend();
+for (; iter != end; ++iter) {
+ assert(iter->ReadAccuracy() >= 0.9);
+}
--- /dev/null
+// using C++11 range-based for loop
+SubreadLengthQuery query(500, Compare::GREATER_THAN_EQUAL, dataset);
+for (const BamRecord& r : query) {
+ assert((r.QueryEnd() - r.QueryStart()) >= 500);
+}
+
+// OR
+
+// using iterators directly
+SubreadLengthQuery query(500, Compare::GREATER_THAN_EQUAL, dataset);
+auto iter = query.cbegin();
+auto end = query.cend();
+for (; iter != end; ++iter) {
+ assert((iter->QueryEnd() - iter->QueryStart()) >= 500);
+}
--- /dev/null
+// One-step construction
+//
+// This is useful in situations that require a const Tag.
+//
+const auto t = Tag('A', TagModifier::ASCII_CHAR);
+
+// or two-step construction
+auto t = Tag('A');
+t.Modifier(TagModifier::ASCII_CHAR);
+
--- /dev/null
+vector<int32_t> zmws = { ... };
+WhitelistedZmwReadStitcher reader(zmws, "primary.bam", "scraps.bam");
+while(reader.HasNext()) {
+ auto virtualRecord = reader.Next();
+ // ... do stuff ...
+}
--- /dev/null
+bool allHoleNumbersEqual(const vector<BamRecord>& group)
+{
+ if (group.empty())
+ return true;
+ const auto firstHoleNumber = group[0].HoleNumber();
+ for (size_t i = 1; i < group.size(); ++i) {
+ if (group[i].HoleNumber() != firstHoleNumber)
+ return false;
+ }
+ return true;
+}
+
+vector<int32_t> whitelist = { 50, 100 };
+ZmwGroupQuery query(whitelist, dataset);
+for(const vector<BamRecord>& group : query) {
+
+ assert(allHoleNumbersEqual(group));
+
+ for (const BamRecord& record : group) {
+ assert(record.HoleNumber() == 50 ||
+ record.HoleNumber() == 100);
+ }
+}
--- /dev/null
+vector<int32_t> whitelist = { 50, 100 };
+ZmwQuery query(whitelist, dataset);
+for (const BamRecord& record : query) {
+ assert(record.HoleNumber() == 50 ||
+ record.HoleNumber() == 100);
+}
--- /dev/null
+Read : singleInsertion2
+Reference : lambda_NEB3011
+
+Read-length : 49
+Concordance : 0.96
+
+5210 : GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGG : 5249
+ |||||||| ||||||||||||||||||| |||||||||||
+ 0 : GGCTGCAG-GTACAGCGGTCAGGAGGCCAATTGATGCCGG : 39
+
+5249 : ACTGGCTGAT : 5259
+ ||||||||||
+ 39 : ACTGGCTGAT : 49
--- /dev/null
+<Filters>
+ <Filter>
+ <Properties>
+ <Property /> # A
+ <Property /> # B
+ </Properties>
+ </Filter>
+ <Filter>
+ <Properties>
+ <Property /> # C
+ <Property /> # D
+ </Properties>
+ </Filter>
+</Filters>
--- /dev/null
+Accuracy
+========
+
+.. code-block:: cpp
+
+ #include <pbbam/Accuracy.h>
+
+.. doxygenclass:: PacBio::BAM::Accuracy
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+AlignmentPrinter
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/AlignmentPrinter.h>
+
+.. doxygenclass:: PacBio::BAM::AlignmentPrinter
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+AlignmentSet
+============
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::AlignmentSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+BaiIndexedBamReader
+===================
+
+.. code-block:: cpp
+
+ #include <pbbam/BaiIndexedBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::BaiIndexedBamReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+BamFile
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/BamFile.h>
+
+.. doxygenclass:: PacBio::BAM::BamFile
+ :members:
+ :protected-members:
+ :undoc-members:
--- /dev/null
+BamHeader
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/BamHeader.h>
+
+.. doxygenclass:: PacBio::BAM::BamHeader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+BamReader
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/BamReader.h>
+
+.. doxygenclass:: PacBio::BAM::BamReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+BamRecord
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/BamRecord.h>
+
+.. doxygenenum:: PacBio::BAM::ClipType
+
+.. doxygenenum:: PacBio::BAM::RecordType
+
+.. doxygenenum:: PacBio::BAM::FrameEncodingType
+
+.. doxygenclass:: PacBio::BAM::BamRecord
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+BamRecordBuilder
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/BamRecordBuilder.h>
+
+.. doxygenclass:: PacBio::BAM::BamRecordBuilder
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+BamRecordImpl
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/BamRecordImpl.h>
+
+.. doxygenclass:: PacBio::BAM::BamRecordImpl
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+BamRecordView
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/BamRecord.h>
+
+.. doxygenclass:: PacBio::BAM::BamRecordView
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+BamTagCodec
+===========
+
+.. code-block:: cpp
+
+ #include <pbbam/BamTagCodec.h>
+
+.. doxygenclass:: PacBio::BAM::BamTagCodec
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+BamWriter
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/BamWriter.h>
+
+.. doxygenclass:: PacBio::BAM::BamWriter
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+BarcodeLookupData
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::BarcodeLookupData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+BarcodeQuery
+============
+
+.. code-block:: cpp
+
+ #include <pbbam/BarcodeQuery.h>
+
+.. doxygenclass:: PacBio::BAM::BarcodeQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+BarcodeSet
+==========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::BarcodeSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+BasicLookupData
+===============
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::BasicLookupData
+ :members:
+ :protected-members:
+ :undoc-members:
--- /dev/null
+Cigar
+=====
+
+.. code-block:: cpp
+
+ #include <pbbam/Cigar.h>
+
+.. doxygenclass:: PacBio::BAM::Cigar
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+CigarOperation
+==============
+
+.. code-block:: cpp
+
+ #include <pbbam/CigarOperation.h>
+
+.. doxygenenum:: PacBio::BAM::CigarOperationType
+
+.. doxygenclass:: PacBio::BAM::CigarOperation
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+Compare
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/Compare.h>
+
+.. doxygenfile:: Compare.h
\ No newline at end of file
--- /dev/null
+Config
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/Conifig.h>
+
+.. doxygenfile:: Config.h
\ No newline at end of file
--- /dev/null
+ConsensusAlignmentSet
+=====================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ConsensusAlignmentSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+ConsensusReadSet
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ConsensusReadSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+ContigSet
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ContigSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+DataSet
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSet.h>
+
+.. doxygenclass:: PacBio::BAM::DataSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+DataSetBase
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::DataSetBase
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+DataSetMetadata
+===============
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::DataSetMetadata
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+EntireFileQuery
+===============
+
+.. code-block:: cpp
+
+ #include <pbbam/EntireFileQuery.h>
+
+.. doxygenclass:: PacBio::BAM::EntireFileQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+ExtensionElement
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ExtensionElement
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+Extensions
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::Extensions
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+ExternalResource
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ExternalResource
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+ExternalResources
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ExternalResources
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+FileIndex
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::FileIndex
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+FileIndices
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::FileIndices
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+Filter
+======
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::Filter
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+Filters
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::Filters
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+Frames
+======
+
+.. code-block:: cpp
+
+ #include <pbbam/Frames.h>
+
+.. doxygenclass:: PacBio::BAM::Frames
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+GenomicInterval
+===============
+
+.. code-block:: cpp
+
+ #include <pbbam/GenomicInterval.h>
+
+.. doxygenclass:: PacBio::BAM::GenomicInterval
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+GenomicIntervalCompositeBamReader
+=================================
+
+.. code-block:: cpp
+
+ #include <pbbam/CompositeBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::GenomicIntervalCompositeBamReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+GenomicIntervalQuery
+====================
+
+.. code-block:: cpp
+
+ #include <pbbam/GenomicIntervalQuery.h>
+
+.. doxygenclass:: PacBio::BAM::GenomicIntervalQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+HdfSubreadSet
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::HdfSubreadSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+IndexResultBlock
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiBasicTypes.h>
+
+.. doxygenstruct:: PacBio::BAM::IndexResultBlock
+ :members:
+ :protected-members:
+ :undoc-members:
+
+.. doxygentypedef:: PacBio::BAM::IndexResultBlocks
+
+.. doxygentypedef:: PacBio::BAM::IndexList
+
+.. doxygentypedef:: PacBio::BAM::IndexRange
\ No newline at end of file
--- /dev/null
+IndexedFastaReader
+==================
+
+.. code-block:: cpp
+
+ #include <pbbam/IndexedFastaReader.h>
+
+.. doxygenclass:: PacBio::BAM::IndexedFastaReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+Interval
+========
+
+.. code-block:: cpp
+
+ #include <pbbam/Interval.h>
+
+.. doxygenclass:: PacBio::BAM::Interval
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+InvalidSequencingChemistryException
+===================================
+
+.. code-block:: cpp
+
+ #include <pbbam/exception/InvalidSequencingChemistryException.h>
+
+.. doxygenclass:: PacBio::BAM::InvalidSequencingChemistryException
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+LocalContextFlags
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/LocalContextFlags.h>
+
+.. doxygenenum:: PacBio::BAM::LocalContextFlags
--- /dev/null
+MappedLookupData
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::MappedLookupData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+NamespaceInfo
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetXsd.h>
+
+.. doxygenclass:: PacBio::BAM::NamespaceInfo
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+NamespaceRegistry
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetXsd.h>
+
+.. doxygenclass:: PacBio::BAM::NamespaceRegistry
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+OrderedLookup
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::OrderedLookup
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+Orientation
+===========
+
+.. code-block:: cpp
+
+ #include <pbbam/Orientation.h>
+
+.. doxygenenum:: PacBio::BAM::Orientation
--- /dev/null
+ParentTool
+==========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ParentTool
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+PbiBuilder
+==========
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiBuilder.h>
+
+.. doxygenclass:: PacBio::BAM::PbiBuilder
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+PbiFile
+=======
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiFile.h>
+
+.. doxygenenum:: PacBio::BAM::PbiFile::Section
+
+.. doxygentypedef:: PacBio::BAM::PbiFile::Sections
+
+.. doxygenenum:: PacBio::BAM::PbiFile::VersionEnum
+
+.. doxygenfunction:: PacBio::BAM::PbiFile::CreateFrom
--- /dev/null
+PbiFilter
+=========
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiFilter.h>
+
+.. doxygenclass:: PacBio::BAM::PbiFilter
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+PbiFilterCompositeBamReader
+===========================
+
+.. code-block:: cpp
+
+ #include <pbbam/CompositeBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::PbiFilterCompositeBamReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+PbiFilterQuery
+==============
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiFilterQuery.h>
+
+.. doxygenclass:: PacBio::BAM::PbiFilterQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+PbiFilterTypes
+==============
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiFilterTypes.h>
+
+.. doxygenfile:: PbiFilterTypes.h
\ No newline at end of file
--- /dev/null
+PbiIndex
+========
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiIndex.h>
+
+.. doxygenclass:: PacBio::BAM::PbiIndex
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+PbiIndexedBamReader
+===================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiIndexedBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::PbiIndexedBamReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+PbiRawBarcodeData
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawBarcodeData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+PbiRawBasicData
+===============
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawBasicData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+PbiRawData
+==========
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+PbiRawMappedData
+================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawMappedData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+PbiRawReferenceData
+===================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiRawReferenceData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+PbiReferenceEntry
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiRawData.h>
+
+.. doxygenclass:: PacBio::BAM::PbiReferenceEntry
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+Position
+========
+
+.. code-block:: cpp
+
+ #include <pbbam/Position.h>
+
+.. doxygentypedef:: PacBio::BAM::Position
+
+.. doxygenvariable:: PacBio::BAM::UnmappedPosition
\ No newline at end of file
--- /dev/null
+ProgramInfo
+===========
+
+.. code-block:: cpp
+
+ #include <pbbam/ProgramInfo.h>
+
+.. doxygenclass:: PacBio::BAM::ProgramInfo
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+QNameQuery
+==========
+
+.. code-block:: cpp
+
+ #include <pbbam/QNameQuery.h>
+
+.. doxygenclass:: PacBio::BAM::QNameQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+QualityValue
+============
+
+.. code-block:: cpp
+
+ #include <pbbam/QualityValue.h>
+
+.. doxygenclass:: PacBio::BAM::QualityValue
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+QualityValues
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/QualityValues.h>
+
+.. doxygenclass:: PacBio::BAM::QualityValues
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+ReadAccuracyQuery
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/ReadAccuracyQuery.h>
+
+.. doxygenclass:: PacBio::BAM::ReadAccuracyQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+ReadGroupInfo
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/ReadGroupInfo.h>
+
+.. doxygenenum:: PacBio::BAM::BaseFeature
+
+.. doxygenenum:: PacBio::BAM::FrameCodec
+
+.. doxygenenum:: PacBio::BAM::BarcodeModeType
+
+.. doxygenenum:: PacBio::BAM::BarcodeQualityType
+
+.. doxygenclass:: PacBio::BAM::ReadGroupInfo
+ :members:
+ :protected-members:
+ :undoc-members:
+
+.. doxygenfunction:: PacBio::BAM::MakeReadGroupId
\ No newline at end of file
--- /dev/null
+ReferenceLookupData
+===================
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::ReferenceLookupData
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+ReferenceSet
+============
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::ReferenceSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+SamTagCodec
+===========
+
+.. code-block:: cpp
+
+ #include <pbbam/SamTagCodec.h>
+
+.. doxygenclass:: PacBio::BAM::SamTagCodec
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+SequenceInfo
+============
+
+.. code-block:: cpp
+
+ #include <pbbam/SequenceInfo.h>
+
+.. doxygenclass:: PacBio::BAM::SequenceInfo
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+SequentialCompositeBamReader
+============================
+
+.. code-block:: cpp
+
+ #include <pbbam/CompositeBamReader.h>
+
+.. doxygenclass:: PacBio::BAM::SequentialCompositeBamReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+Strand
+======
+
+.. code-block:: cpp
+
+ #include <pbbam/Strand.h>
+
+.. doxygenenum:: PacBio::BAM::Strand
--- /dev/null
+SubDataSets
+===========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::SubDataSets
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+SubreadLengthQuery
+==================
+
+.. code-block:: cpp
+
+ #include <pbbam/SubreadLengthQuery.h>
+
+.. doxygenclass:: PacBio::BAM::SubreadLengthQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+SubreadSet
+==========
+
+.. code-block:: cpp
+
+ #include <pbbam/DataSetTypes.h>
+
+.. doxygenclass:: PacBio::BAM::SubreadSet
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+Tag
+===
+
+.. code-block:: cpp
+
+ #include <pbbam/Tag.h>
+
+.. doxygenenum:: PacBio::BAM::TagDataType
+
+.. doxygenenum:: PacBio::BAM::TagModifier
+
+.. doxygenclass:: PacBio::BAM::Tag
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+TagCollection
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/TagCollection.h>
+
+.. doxygenclass:: PacBio::BAM::TagCollection
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+UnorderedLookup
+===============
+
+.. code-block:: cpp
+
+ #include <pbbam/PbiLookupData.h>
+
+.. doxygenclass:: PacBio::BAM::UnorderedLookup
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+VirtualPolymeraseBamRecord
+==========================
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/VirtualPolymeraseBamRecord.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualPolymeraseBamRecord
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+VirtualPolymeraseCompositeReader
+================================
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/VirtualPolymeraseCompositeReader.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualPolymeraseCompositeReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+VirtualPolymeraseReader
+=======================
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/VirtualPolymeraseReader.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualPolymeraseReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+VirtualRegion
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/VirtualRegion.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualRegion
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+VirtualRegionType
+=================
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/VirtualRegionType.h>
+
+.. doxygenenum:: PacBio::BAM::VirtualRegionType
--- /dev/null
+VirtualRegionTypeMap
+====================
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/VirtualRegionTypeMap.h>
+
+.. doxygenclass:: PacBio::BAM::VirtualRegionTypeMap
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+ZmwGroupQuery
+=============
+
+.. code-block:: cpp
+
+ #include <pbbam/ZmwGroupQuery.h>
+
+.. doxygenclass:: PacBio::BAM::ZmwGroupQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+ZmwQuery
+========
+
+.. code-block:: cpp
+
+ #include <pbbam/ZmwQuery.h>
+
+.. doxygenclass:: PacBio::BAM::ZmwQuery
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+ZmwWhitelistVirtualReader
+=========================
+
+.. code-block:: cpp
+
+ #include <pbbam/virtual/ZmwWhitelistVirtualReader.h>
+
+.. doxygenclass:: PacBio::BAM::ZmwWhitelistVirtualReader
+ :members:
+ :protected-members:
+ :undoc-members:
\ No newline at end of file
--- /dev/null
+.. _api_reference:
+
+C++ API Reference
+=================
+
+Watch this space for more recipes & how-tos.
+
+.. toctree::
+ :maxdepth: 1
+ :glob:
+
+ api/*
--- /dev/null
+.. _command_line:
+
+Command Line Utilities
+======================
+
+In addition to the main library and wrappers, pbbam also provides a few basic
+utilities for working with PacBio indices (".pbi" files).
+
+.. toctree::
+ :maxdepth: 1
+
+ tools/bam2sam
+ tools/pbindex
+ tools/pbindexdump
+ tools/pbmerge
--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# pbbam documentation build configuration file, created by
+# sphinx-quickstart on Fri Dec 4 10:08:52 2015.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+import shlex
+import re
+import subprocess
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# get RTD to run doxygen first, per http://breathe.readthedocs.org/en/latest/readthedocs.html
+# but... we generate our actual Doxyfile via CMake in a normal build,
+# so we need to create one here, subbing actual values
+read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
+if read_the_docs_build:
+
+ # fetch directory info
+ this_dir = os.path.abspath(os.getcwd())
+ docs_dir = os.path.abspath(os.path.join(this_dir, '..'))
+ root_dir = os.path.abspath(os.path.join(docs_dir, '..'))
+ include_dir = os.path.abspath(os.path.join(root_dir, 'include'))
+
+ # get project version
+ version = ''
+ with open(os.path.abspath(os.path.join(root_dir, 'CMakeLists.txt')), 'r') as cmakeFile:
+ for line in cmakeFile:
+ if line.startswith('project'):
+ version = re.search(r'VERSION\s*([\d.]+)', line).group(1)
+ break
+
+ # read Doxyfile.in, replace markers with real values, and write Doxyfile
+ inDoxyfile = open(os.path.abspath(os.path.join(docs_dir, 'Doxyfile.in')), 'r')
+ configIn = inDoxyfile.read()
+ configOut = re.sub('@PacBioBAM_NAME@', 'pbbam', \
+ re.sub('@PacBioBAM_VERSION@', version, \
+ re.sub('@PacBioBAM_DocsDir@', docs_dir, \
+ re.sub('@PacBioBAM_IncludeDir@', include_dir, configIn))))
+ outDoxyfile = open(os.path.abspath(os.path.join(docs_dir, 'Doxyfile')), 'w')
+ #print(configOut, outDoxyfile)
+ print >>outDoxyfile, configOut
+ outDoxyfile.close()
+
+ # now run Doxygen
+ subprocess.call('cd ..; doxygen', shell=True)
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['breathe']
+#extensions = [
+# 'sphinx.ext.autodoc',
+ # 'sphinx.ext.coverage',
+ # 'breathe',
+#]
+
+# Setup Breathe extension varialbes
+breathe_projects = { 'pbbam' : os.path.join(os.getcwd(), '..', 'xml') + os.path.sep }
+breathe_default_project = 'pbbam'
+breathe_default_members = ('members', 'undoc-members')
+breathe_implementation_filename_extensions = [ '.cpp', '.inl' ]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'pbbam'
+copyright = u'2015, Derek Barnett'
+author = u'Derek Barnett'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.7.4'
+# The full version, including alpha/beta/rc tags.
+release = '0.7.4'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = []
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+html_theme = 'pacbio-theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+html_theme_path = ['.']
+
+# The name for this set of Sphinx documents. If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar. Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it. The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
+# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
+#html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# Now only 'ja' uses this config value
+#html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'pbbamdoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+
+# Latex figure (float) alignment
+#'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+# author, documentclass [howto, manual, or own class]).
+latex_documents = [
+ (master_doc, 'pbbam.tex', u'pbbam Documentation',
+ u'Derek Barnett', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ (master_doc, 'pbbam', u'pbbam Documentation',
+ [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ (master_doc, 'pbbam', u'pbbam Documentation',
+ author, 'pbbam', 'One line description of project.',
+ 'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
--- /dev/null
+
+.. _getting_started:
+
+Getting Started
+===============
+
+.. _getting_started-requirements:
+
+Requirements
+------------
+
+These components will almost certainly already be on your system.
+
+* `gcc`_ (4.8+) OR `clang`_ (v3.1+)
+* pthreads
+* zlib
+
+Double-check your compiler version, to be sure it is compatible.
+
+.. code-block:: console
+
+ $ g++ -v
+ $ clang -v
+
+Additional requirements:
+
+* `Boost`_ (1.55+)
+* `CMake`_ (3.0+)
+* `Google Test`_
+* `htslib`_ (PacBio fork)
+
+For additional languages:
+
+* `SWIG`_ (3.0.5+)
+
+For building API documentation locally:
+
+* `Doxygen`_
+
+For maximal convenience, install htslib and google test in the same parent directory you plan to install pbbam.
+
+.. _Boost: http://www.boost.org/
+.. _clang: http://clang.llvm.org/
+.. _CMake: https://cmake.org/
+.. _Doxygen: http://www.stack.nl/~dimitri/doxygen/
+.. _gcc: https://gcc.gnu.org/
+.. _Google Test: https://github.com/google/googletest
+.. _htslib: https://github.com/PacificBiosciences/htslib.git
+.. _SWIG: http://www.swig.org/
+
+.. _getting_started-build:
+
+Clone & Build
+-------------
+
+.. note::
+
+ The following steps are for building the C++ library and command-line utilities.
+ If you are integrating pbbam into a C#, Python, or R project, take a look at the
+ instructions for :ref:`additional languages <swig_bindings>`.
+
+The basic steps for obtaining pbbam and building it from source are as follows:
+
+.. code-block:: console
+
+ $ git clone https://github.com/PacificBiosciences/pbbam.git
+ $ cd pbbam
+ $ mkdir build
+ $ cd build
+ $ cmake ..
+ $ make -j 4 # compiles using 4 threads
+
+Output:
+
+ * Library : <pbbam_root>/lib
+ * Headers : <pbbam_root>/include
+ * Utilities : <pbbam_root>/bin
+
+You may need to set a few options on the cmake command, to point to dependencies' install locations.
+Common installation-related options include:
+
+ * HTSLIB_ROOTDIR
+ * GTEST_SRC_DIR
+
+Add these using the '-D' argument, like this:
+
+.. code-block:: console
+
+ $ cmake .. -DHTSLIB_ROOTDIR="path/to/htslib"
+
+To run the test suite, run:
+
+.. code-block:: console
+
+ $ make test
+
+To build a local copy of the (Doxygen-style) API documentation, run:
+
+.. code-block:: console
+
+ $ make doc
+
+And then open <pbbam_root>/docs/html/index.html in your favorite browser.
+
+.. _getting_started-integrate:
+
+Integrate
+---------
+
+CMake-based projects
+````````````````````
+
+For CMake-based projects that will "ship with" or otherwise live alongside pbbam, you can
+use the approach described here.
+
+Before defining your library or executable, add the following:
+
+.. code-block:: cmake
+
+ add_subdirectory(<path/to/pbbam> external/build/pbbam)
+
+When it's time to run "make" this will ensure that pbbam will be built, inside your own project's
+build directory. After this point in the CMakeLists.txt file(s), a few variables will be available
+that can be used to setup your include paths and library linking targets:
+
+.. code-block:: cmake
+
+ include_directories(
+ ${PacBioBAM_INCLUDE_DIRS}
+ # other includes that your project needs
+ )
+
+ add_executable(foo)
+
+ target_link_libraries(foo
+ ${PacBioBAM_LIBRARIES}
+ # other libs that your project needs
+ )
+
+Non-CMake projects
+``````````````````
+
+If you're using something other than CMake for your project's build system, then you need to point
+it to pbbam's include directory & library, as well as those of its dependencies (primarily htslib).
--- /dev/null
+.. pbbam documentation master file, created by
+ sphinx-quickstart on Fri Dec 4 10:08:52 2015.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+.. _home:
+
+pbbam documentation
+===================
+
+As of the 3.0 release of SMRTanalysis, PacBio is embracing the industry standard BAM
+format for (both aligned and unaligned) basecall data files. We have also formulated
+a BAM companion file format (bam.pbi) enabling fast access to a richer set of per-read
+information as well as compatibility for software built around the legacy cmp.h5 format.
+
+The **pbbam** software package provides components to create, query, & edit PacBio BAM
+files and associated indices. These components include a core C++ library, bindings for
+additional languages, and command-line utilities.
+
+.. toctree::
+ :maxdepth: 1
+
+ getting_started
+ api_reference
+ swig_bindings
+ commandline_utilities
+
+
+Search:
+
+* :ref:`genindex`
+* :ref:`search`
+
--- /dev/null
+/**
+ * Sphinx stylesheet -- default theme
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+@import url("basic.css");
+
+/* -- page layout ----------------------------------------------------------- */
+
+body {
+ font-family: Arial, sans-serif;
+ font-size: 100%;
+ background-color: #555;
+ color: #555;
+ margin: 0;
+ padding: 0;
+ min-width: 500px;
+ max-width: 956px;
+ margin: 0 auto;
+}
+
+div.documentwrapper {
+ float: left;
+ width: 100%;
+}
+
+div.bodywrapper {
+ margin: 0 0 0 230px;
+}
+
+hr{
+ border: 1px solid #B1B4B6;
+
+}
+
+div.document {
+ background-color: #eee;
+}
+
+div.body {
+ background-color: #ffffff;
+ color: #3E4349;
+ padding: 30px 30px 30px 30px;
+ font-size: 0.8em;
+}
+
+div.footer {
+ color: #555;
+ background-color: #fff;
+ padding: 13px 0;
+ text-align: center;
+ font-size: 75%;
+
+}
+div.footer a {
+ color: #444;
+ text-decoration: underline;
+}
+
+div.related {
+ background: #fff url(headerGradient.jpg);
+ line-height: 80px;
+ color: #fff;
+ font-size: 0.80em;
+ height: 79px;
+ z-index: -1;
+}
+
+div.related ul {
+ background: url(pacbioLogo.png) 10px no-repeat;
+ padding: 0 0 0 200px;
+}
+
+div.related a {
+ color: #E2F3CC;
+}
+
+div.sphinxsidebar {
+ font-size: 0.75em;
+ line-height: 1.5em;
+}
+
+div.sphinxsidebarwrapper{
+ padding: 20px 0;
+}
+
+div.sphinxsidebar h3,
+div.sphinxsidebar h4 {
+ font-family: Arial, sans-serif;
+ color: #222;
+ font-size: 1.2em;
+ font-weight: bold;
+ margin: 0;
+ padding: 5px 10px 0 10px;
+}
+
+div.sphinxsidebar h4{
+ font-size: 1.1em;
+}
+
+div.sphinxsidebar h3 a {
+ color: #444;
+}
+
+
+div.sphinxsidebar p {
+ color: #888;
+ padding: 0px 20px;
+ margin-top: 5px;
+}
+
+div.sphinxsidebar p.topless {
+}
+
+div.sphinxsidebar ul {
+ margin: 5px 20px 10px 20px;
+ padding: 0;
+ color: #000;
+}
+
+div.sphinxsidebar a {
+ color: #444;
+}
+
+div.sphinxsidebar input {
+ border: 1px solid #ccc;
+ font-family: sans-serif;
+ font-size: 1em;
+}
+
+div.sphinxsidebar input[type=text]{
+ margin-left: 20px;
+}
+
+/* -- body styles ----------------------------------------------------------- */
+
+a {
+ color: #005B81;
+ text-decoration: none;
+}
+
+a:hover {
+ color: #E32E00;
+ text-decoration: underline;
+}
+
+div.body h1,
+div.body h2,
+div.body h3,
+div.body h4,
+div.body h5,
+div.body h6 {
+ font-family: Arial, sans-serif;
+ font-weight: bold;
+ color: #264868;
+ margin: 30px 0px 10px 0px;
+ padding: 5px 0 5px 0px;
+}
+
+div.body h1 { border-top: 20px solid white; margin-top: 0; font-size: 180%; font-weight: normal; }
+div.body h2 { font-size: 125%; }
+div.body h3 { font-size: 110%; }
+div.body h4 { font-size: 100%; }
+div.body h5 { font-size: 100%; }
+div.body h6 { font-size: 100%; }
+
+a.headerlink {
+ color: #c60f0f;
+ font-size: 0.8em;
+ padding: 0 4px 0 4px;
+ text-decoration: none;
+}
+
+a.headerlink:hover {
+ background-color: #c60f0f;
+ color: white;
+}
+
+div.body p, div.body dd, div.body li {
+ line-height: 1.5em;
+ font-size: 1em;
+}
+
+div.admonition p.admonition-title + p {
+ display: inline;
+}
+
+div.highlight{
+ background-color: white;
+}
+
+div.note {
+ background-color: #eee;
+ border: 1px solid #ccc;
+}
+
+div.seealso {
+ background-color: #ffc;
+ border: 1px solid #ff6;
+}
+
+div.topic {
+ background-color: #eee;
+}
+
+div.warning {
+ background-color: #ffe4e4;
+ border: 1px solid #f66;
+}
+
+p.admonition-title {
+ display: inline;
+}
+
+p.admonition-title:after {
+ content: ":";
+}
+
+pre {
+ padding: 10px;
+ background-color: White;
+ color: #222;
+ line-height: 1.2em;
+ border: 1px solid #C6C9CB;
+ font-size: 1.2em;
+ margin: 1.5em 0 1.5em 0;
+ -webkit-box-shadow: 1px 1px 1px #d8d8d8;
+ -moz-box-shadow: 1px 1px 1px #d8d8d8;
+}
+
+tt {
+ background-color: #ecf0f3;
+ color: #222;
+ padding: 1px 2px;
+ font-size: 1.2em;
+ font-family: monospace;
+}
+
--- /dev/null
+.c { color: #999988; font-style: italic } /* Comment */
+.k { font-weight: bold } /* Keyword */
+.o { font-weight: bold } /* Operator */
+.cm { color: #999988; font-style: italic } /* Comment.Multiline */
+.cp { color: #999999; font-weight: bold } /* Comment.preproc */
+.c1 { color: #999988; font-style: italic } /* Comment.Single */
+.gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
+.ge { font-style: italic } /* Generic.Emph */
+.gr { color: #aa0000 } /* Generic.Error */
+.gh { color: #999999 } /* Generic.Heading */
+.gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
+.go { color: #111 } /* Generic.Output */
+.gp { color: #555555 } /* Generic.Prompt */
+.gs { font-weight: bold } /* Generic.Strong */
+.gu { color: #aaaaaa } /* Generic.Subheading */
+.gt { color: #aa0000 } /* Generic.Traceback */
+.kc { font-weight: bold } /* Keyword.Constant */
+.kd { font-weight: bold } /* Keyword.Declaration */
+.kp { font-weight: bold } /* Keyword.Pseudo */
+.kr { font-weight: bold } /* Keyword.Reserved */
+.kt { color: #445588; font-weight: bold } /* Keyword.Type */
+.m { color: #009999 } /* Literal.Number */
+.s { color: #bb8844 } /* Literal.String */
+.na { color: #008080 } /* Name.Attribute */
+.nb { color: #999999 } /* Name.Builtin */
+.nc { color: #445588; font-weight: bold } /* Name.Class */
+.no { color: #ff99ff } /* Name.Constant */
+.ni { color: #800080 } /* Name.Entity */
+.ne { color: #990000; font-weight: bold } /* Name.Exception */
+.nf { color: #990000; font-weight: bold } /* Name.Function */
+.nn { color: #555555 } /* Name.Namespace */
+.nt { color: #000080 } /* Name.Tag */
+.nv { color: purple } /* Name.Variable */
+.ow { font-weight: bold } /* Operator.Word */
+.mf { color: #009999 } /* Literal.Number.Float */
+.mh { color: #009999 } /* Literal.Number.Hex */
+.mi { color: #009999 } /* Literal.Number.Integer */
+.mo { color: #009999 } /* Literal.Number.Oct */
+.sb { color: #bb8844 } /* Literal.String.Backtick */
+.sc { color: #bb8844 } /* Literal.String.Char */
+.sd { color: #bb8844 } /* Literal.String.Doc */
+.s2 { color: #bb8844 } /* Literal.String.Double */
+.se { color: #bb8844 } /* Literal.String.Escape */
+.sh { color: #bb8844 } /* Literal.String.Heredoc */
+.si { color: #bb8844 } /* Literal.String.Interpol */
+.sx { color: #bb8844 } /* Literal.String.Other */
+.sr { color: #808000 } /* Literal.String.Regex */
+.s1 { color: #bb8844 } /* Literal.String.Single */
+.ss { color: #bb8844 } /* Literal.String.Symbol */
+.bp { color: #999999 } /* Name.Builtin.Pseudo */
+.vc { color: #ff99ff } /* Name.Variable.Class */
+.vg { color: #ff99ff } /* Name.Variable.Global */
+.vi { color: #ff99ff } /* Name.Variable.Instance */
+.il { color: #009999 } /* Literal.Number.Integer.Long */
+
--- /dev/null
+[theme]
+inherit = default
+stylesheet = pacbio.css
+pygments_style = tango
--- /dev/null
+.. _swig_bindings:
+
+Additional Languages
+====================
+
+pbbam uses SWIG to generate bindings for other languages. Currently this includes support for C#, Python, and R.
+
+These bindings are disabled by default. See the entry below for your target language to configure pbbam & integrate
+the bindings into your project.
+
+.. _swig_bindings-csharp:
+
+C#
+------
+
+Building
+````````
+
+To build the support for C#, you need to tell CMake to enable it before building:
+
+.. code-block:: console
+
+ $ cmake .. -DPacBioBAM_wrap_csharp
+ $ make
+
+The 'make' step will build relevant libraries/wrappers, and then run a simple program using them,
+as a quick sanity-check.
+
+After building, the libraries and wrappers can be found under the pbbam/lib/csharp directory.
+
+API Example
+```````````
+
+.. code-block:: c#
+
+ using PacBio.BAM;
+
+ namespace TestStuff
+ {
+ public class TestPbbam
+ {
+ public static void TestZmwQuery()
+ {
+ var d = new DataSet("foo.bam");
+ var q = new ZmwQuery(new IntList {1, 2, 3}, d);
+ var q2 = new ZmwQuery(new IntList { 14743 }, d);
+ if (0 != q.Count() || 4 != q2.Count())
+ {
+ throw new Exception("ZmwQuery not working");
+ }
+ Console.WriteLine("TestZmwQuery - OK!");
+ }
+ }
+ }
+
+.. _swig_bindings-python:
+
+Python
+------
+
+Building
+````````
+
+To build the support for Python, you need to tell CMake to enable it:
+
+.. code-block:: console
+
+ $ cmake .. -DPacBioBAM_wrap_python
+ $ make
+
+The 'make' step will build relevant libraries/wrappers, and then run a simple program using them,
+as a quick sanity-check.
+
+After building, the libraries and wrappers can be found in the pbbam/lib/python directory.
+'make test' will also include some Python-side unit tests as well.
+
+To use the PacBioBam module, you can set your PYTHONPATH before invoking your script:
+
+.. code-block:: console
+
+ $ PYTHONPATH="path/to/pbbam/lib/python" python myScript.py
+
+Or otherwise configure your environment to find the PacBioBam module.
+
+API Example
+```````````
+
+.. code-block:: python
+
+ import PacBioBam
+
+ try:
+ file = PacBioBam.BamFile('foo.bam')
+ writer = PacBioBam.BamWriter('new.bam', file.Header())
+ dataset = PacBioBam.DataSet(file)
+ entireFile = PacBioBam.EntireFileQuery(dataset)
+ for record in PacBioBam.Iterate(entireFile):
+ writer.Write(record)
+ except RuntimeError:
+ # found error
+
+Python-Specific Notes
+`````````````````````
+
+Iteration
+.........
+
+Iteration over dataset queries in Python will likely need to use the PacBioBam.Iterate() method. Thus
+file iteration loops will look something like the following:
+
+.. code-block:: python
+
+ entireFile = PacBioBam.EntireFileQuery("input.bam")
+ for record in PacBioBam.Iterate(entireFile):
+ foo.bar(record)
+
+Exception Handling
+..................
+
+Exceptions are used widely by the C++ library. To handle them from Python, you can use try blocks, looking for
+any RuntimeError:
+
+.. code-block:: python
+
+ try:
+ file = PacBioBam.BamFile("does_not_exist.bam")
+ except RuntimeError:
+ print("caught expected error")
+
+.. _swig_bindings-r:
+
+R
+------
+
+Building
+````````
+
+To build the support for R, you need to tell CMake to enable it:
+
+.. code-block:: console
+
+ $ cmake .. -DPacBioBAM_wrap_r
+ $ make
+
+The 'make' step will build relevant libraries/wrappers, and then run a simple program using them,
+as a quick sanity-check.
+
+After building, the libraries and wrappers can be found in the pbbam/lib/R directory.
+'make test' will also include some R-side unit tests as well.
+
+To use the PacBioBam module in your script, nothing should be needed up front - simply invoke 'R' as normal.
+You'll do the dynamic load of the R module near the beginning of your script:
+
+.. code-block:: r
+
+ # load pbbam R library
+ lib_path <- "path/to/pbbam/lib/R"
+ pbbam_libname <- paste(lib_path, "PacBioBam", sep="/")
+ pbbam_wrapper <- paste(lib_path, "PacBioBam.R", sep="/")
+ dyn.load(paste(pbbam_libname, .Platform$dynlib.ext, sep=""))
+ source(pbbam_wrapper)
+ cacheMetaData(1)
+
+
+API Example
+```````````
+
+.. code-block:: r
+
+ # load pbbam R library
+ lib_path <- "path/to/pbbam/lib/R"
+ pbbam_libname <- paste(lib_path, "PacBioBam", sep="/")
+ pbbam_wrapper <- paste(lib_path, "PacBioBam.R", sep="/")
+ dyn.load(paste(pbbam_libname, .Platform$dynlib.ext, sep=""))
+ source(pbbam_wrapper)
+ cacheMetaData(1)
+
+ # sample method
+ copyFileAndFetchRecordNames <-function(inputFn, outputFn) {
+
+ result <- tryCatch(
+ {
+ file <- BamFile(inputFn)
+ writer <- BamWriter(outputFn, file$Header())
+ ds <- DataSet(file)
+
+ entireFile <- EntireFileQuery(ds)
+ iter <- entireFile$begin()
+ end <- entireFile$end()
+
+ while ( iter$'__ne__'(end) ) {
+ record <- iter$value()
+
+ names_in <- c(names_in, record$FullName())
+ writer$Write(record)
+ iter$incr()
+ }
+ writer$TryFlush()
+ return(names_in)
+ },
+ error = function(e) {
+ # handle error
+ return(list())
+ })
+ return(result)
+ }
+
+R-Specific Notes
+````````````````
+
+Iteration
+.........
+
+To compare iterators, you'll need to explicitly use the '__eq__' or '__ne__' methods. Thus iterating over
+a data query, will look something like this:
+
+.. code-block:: r
+
+ iter <- query$begin()
+ end <- query$end()
+ while ( iter$'__ne__'(end) ) {
+ record <- iter$value()
+
+ # do stuff with record
+ }
+
+operator[]
+..........
+
+In C++, operator[] can be used in some classes to directly access elements in a sequence, e.g. Cigar string
+
+.. code-block:: cpp
+
+ CigarOperation op = cigar[0];
+
+For the R wrapper, if you want to do the same sort of thing, you'll need to use the '__getitem__' method.
+Please note that these are **0-based** indices, not 1-based as in much of R.
+
+.. code-block:: r
+
+ op <- cigar$'__getitem__'(0)
+
+Exception Handling
+..................
+
+Exceptions are used widely by the C++ library. To handle them from R, you can use the 'tryCatch' block, listening for
+'error' type exceptions.
+
+ .. code-block:: r
+
+ result <- tryCatch(
+ {
+ f <- BamFile("does_not_exist.bam") # this statement will throw
+ },
+ error = function(e) {
+ print(paste("caught expected erorr: ",e))
+ })
--- /dev/null
+.. _bam2sam:
+
+bam2sam
+=======
+
+::
+
+ Usage: bam2sam [options] [input]
+
+ bam2sam converts a BAM file to SAM. It is essentially a stripped-down 'samtools
+ view', mostly useful for testing/debugging without requiring samtools. Input BAM
+ file is read from a file or stdin, and SAM output is written to stdout.
+
+ Options:
+ -h, --help show this help message and exit
+ --version show program's version number and exit
+
+ Options:
+ input Input BAM file. If not provided, stdin will be used as input.
+ --no-header Omit header from output.
+ --header-only Print only the header (no records).
--- /dev/null
+.. _pbindex:
+
+pbindex
+=======
+
+::
+
+ Usage: pbindex <input>
+
+ pbindex creates a index file that enables random-access to PacBio-specific data
+ in BAM files. Generated index filename will be the same as input BAM plus .pbi suffix.
+
+ Options:
+ -h, --help show this help message and exit
+ --version show program's version number and exit
+
+ Input/Output:
+ input Input BAM file
--- /dev/null
+.. _pbindexdump:
+
+pbindexdump
+===========
+
+::
+
+ Usage: pbindexdump [options] [input]
+
+ pbindexdump prints a human-readable view of PBI data to stdout.
+
+ Options:
+ -h, --help show this help message and exit
+ --version show program's version number and exit
+
+ Input/Output:
+ input Input PBI file. If not provided, stdin will be used as input.
+ --format=STRING Output format, one of:
+ json, cpp
+
+ json: pretty-printed JSON [default]
+
+ cpp: copy/paste-able C++ code that can be used to
+ construct the equivalent PacBio::BAM::PbiRawData object
+
+ JSON Formatting:
+ --json-indent-level=INT
+ JSON indent level [4]
+ --json-raw Prints fields in a manner that more closely reflects the
+ PBI file format - presenting data as per-field columns,
+ not per-record objects.
+
+JSON Output Schemas
+-------------------
+
+Normal JSON:
+
+.. code-block:: JSON
+
+ {
+ "type": "object",
+ "properties": {
+ "fileSections": {
+ "type": "array",
+ "items": { "type": "string" },
+ },
+ "numReads": { "type": "integer" },
+ "reads": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "aEnd": { "type": "integer" },
+ "aStart": { "type": "integer" },
+ "bcForward": { "type": "integer" },
+ "bcQuality": { "type": "integer" },
+ "bcReverse": { "type": "integer" },
+ "contextFlag": { "type": "integer" },
+ "fileOffset": { "type": "integer" },
+ "holeNumber": { "type": "integer" },
+ "mapQuality": { "type": "integer" },
+ "nM": { "type": "integer" },
+ "nMM": { "type": "integer" },
+ "qEnd": { "type": "integer" },
+ "qStart": { "type": "integer" },
+ "readQuality": { "type": "number" },
+ "reverseStrand": { "type": "integer" },
+ "rgId": { "type": "integer" },
+ "tEnd": { "type": "integer" },
+ "tId": { "type": "integer" },
+ "tStart: { "type": "integer" }
+ },
+ "required": [
+ "contextFlag",
+ "fileOffset",
+ "holeNumber",
+ "qEnd",
+ "qStart",
+ "readQuality",
+ "rgId"
+ ]
+ }
+ },
+ "references": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "beginRow": { "type": "integer" },
+ "endRow": { "type": "integer" },
+ "tId": { "type": "integer" }
+ },
+ "required" : [ "beginRow", "endRow","tId" ]
+ }
+ }q
+ "version": { "type": "string" }
+ },
+ "required": [
+ "fileSections",
+ "numReads",
+ "reads",
+ "version"
+ ]
+ }
+
+"Raw" JSON:
+
+.. code-block:: JSON
+
+ {
+ "type": "object",
+ "properties": {
+ "barcodeData" : {
+ "type" : "object",
+ "properties: {
+ "bcForward" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "bcQuality" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "bcReverse" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ }
+ }
+ },
+ "basicData" : {
+ "type" : "object",
+ "properties: {
+ "contextFlag" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "fileOffset" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "holeNumber" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "qEnd" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "qStart" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "readQuality" : {
+ "type": "array",
+ "items" : { "type": "number" }
+ },
+ "rgId : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ }
+ }
+ },
+ "fileSections": {
+ "type": "array",
+ "items": { "type": "string" },
+ },
+ "mappedData" : {
+ "type" : "object",
+ "properties: {
+ "aEnd" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "aStart" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "mapQuality" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "nM" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "nMM" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "readQuality" : {
+ "type": "array",
+ "items" : { "type": "number" }
+ },
+ "reverseStrand" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "tEnd" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "tId" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ },
+ "tStart" : {
+ "type": "array",
+ "items" : { "type": "integer" }
+ }
+ }
+ },
+ "numReads": { "type": "integer" },
+ "references": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "beginRow": { "type": "integer" },
+ "endRow": { "type": "integer" },
+ "tId": { "type": "integer" }
+ },
+ "required" : [ "beginRow", "endRow","tId" ]
+ }
+ },
+ "version" : { "type": "string" }
+ },
+ "required": [
+ "fileSections",
+ "numReads",
+ "basicData",
+ "version"
+ ]
+ }
--- /dev/null
+.. _pbmerge:
+
+pbmerge
+=======
+
+::
+
+ Usage: pbmerge [options] [-o <out.bam>] <INPUT>
+
+ pbmerge merges PacBio BAM files. If the input is DataSetXML, any filters will be
+ applied. If no output filename is specified, new BAM will be written to stdout.
+
+ Options:
+ -h, --help show this help message and exit
+ --version show program's version number and exit
+
+ Input/Output:
+ -o output Output BAM filename.
+ --no-pbi Set this option to skip PBI index file creation. PBI
+ creation is automatically skipped if no output filename
+ is provided.
+ INPUT Input may be one of:
+ DataSetXML, list of BAM files, or FOFN
+
+ fofn: pbmerge -o merged.bam bams.fofn
+
+ bams: pbmerge -o merged.bam 1.bam 2.bam 3.bam
+
+ xml: pbmerge -o merged.bam foo.subreadset.xml
+
--- /dev/null
+=================================================================
+**pbbam Software Design & Functional Specification**
+=================================================================
+| *Version 0.1*
+| *Pacific Biosciences Engineering Group*
+| *Jan 29, 2016*
+
+1. Revision History
+===================
+
++-------------+---------------+--------------------+---------------------------+
+| **Date** | **Revision** | **Author(s)** | **Comments** |
++=============+===============+====================+===========================+
+| 01-29-2016 | 0.1 | Derek Barnett | Initial draft created |
+| | | | |
++-------------+---------------+--------------------+---------------------------+
+
+2. Introduction
+===============
+
+2.1. Document Specification Identifier
+--------------------------------------
+
++-----------------------------------+------------------------------------------+
+| **Document Specification Prefix** | **Description** |
++===================================+==========================================+
+| FS\_SA\_PBBAM\_ | Functional spec for pbbam |
++-----------------------------------+------------------------------------------+
+
+2.2. Purpose
+------------
+
+This document is intended to describe the requirements and interface of the pbbam
+library, which provides functionality for creating, querying, and editing PacBio
+BAM files and associated file formats.
+
+2.3. Scope of Document
+----------------------
+
+This document covers the expected usage of the pbbam library, as well as any
+desired or required performance characteristics with respect to quality or speed.
+
+This document does not provide installation instructions or API documentation.
+
+2.4. Glossary of Terms
+----------------------
+
+The table below specifies only terms specific to this document, and skips
+acronyms/terms that are specified in `Pacific Biosciences Software Glossary`_.
+
+.. _Pacific Biosciences Software Glossary: http://smrtanalysis-docs/pb_sw_glossary.html
+
++------------------+-----------------------------------------------------------+
+| **Acronym/Term** | **Description** |
++==================+===========================================================+
+| API | Application Programming Interface - a set of routines, |
+| | protocols, and tools for building software applications. |
+| | In this document , this will consist of one or more |
+| | cooperating libraries that specify data structures, |
+| | methods, etc. for use within a target programming |
+| | language. |
++------------------+-----------------------------------------------------------+
+| Client | An application that uses the library. |
++------------------+-----------------------------------------------------------+
+| I/O | Input/output of data. |
++------------------+-----------------------------------------------------------+
+
+2.5. References
+---------------
+
++-------------+------------------------------+--------------------------------------+
+| **Ref No.** | **Document Name, Link** | **Description** |
++=============+==============================+======================================+
+| (1) | `BAM format`_ | General SAM/BAM specification |
++-------------+------------------------------+--------------------------------------+
+| (2) | `PacBio BAM`_ | PacBio BAM specification |
++-------------+------------------------------+--------------------------------------+
+| (3) | `PacBio BAM index`_ | PacBio BAM index specification |
++-------------+------------------------------+--------------------------------------+
+| (4) | `DataSet XML`_ | PacBio DataSet XML specification |
++-------------+------------------------------+--------------------------------------+
+| (5) | `Software Style Guide`_ | PacBio coding standards |
++-------------+------------------------------+--------------------------------------+
+| (6) | `SMRT Analysis`_ | General SMRT Analysis infrastructure |
++-------------+------------------------------+--------------------------------------+
+
+.. _BAM format: https://samtools.github.io/hts-specs/SAMv1.pdf
+.. _PacBio BAM: http://pacbiofileformats.readthedocs.org/en/3.0/BAM.html
+.. _PacBio BAM index: http://pacbiofileformats.readthedocs.org/en/3.0/PacBioBamIndex.html
+.. _DataSet XML: https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/DataSet.rst
+.. _Software Style Guide: http://smrtanalysis-docs/_downloads/PBISoftwareStyleGuide.doc
+.. _SMRT Analysis: http://smrtanalysis-docs/smrt_docs.html
+
+3. Software Overview
+====================
+
+3.1. Product Description
+------------------------
+
+As of the 3.0 release of SMRTanalysis, PacBio is embracing the industry standard
+`BAM format`_ (1) for (both aligned and unaligned) basecall data files. We have
+also formulated a BAM companion file format (.bam.pbi) enabling fast access to a
+richer set of per-read information as well as compatibility for software built
+around the legacy cmp.h5 format.
+
+The pbbam library provides components to create, query, & transform PacBio BAM
+data: sequence files and their associated indices. This includes a core C++
+library as well as bindings for additional programming languages.
+
+3.2. Product Functional Capabilities
+------------------------------------
+
+The library must be able to read and write BAM files that conform to the
+`PacBio BAM`_ specification (2). BAM records must be editable e.g. adding
+alignment information. Random access must be supported, whether by genomic
+region or by filtering record features. To this end, the library will be able to
+read, write, and create associated index files - both the standard BAM index
+(.bai) and the `PacBio BAM index`_ (.pbi) (3). In addition to working with
+individual files, datasets of related BAM files will be supported. These are
+described in a `DataSet XML`_ document. (4)
+
+3.3. User Characteristics
+-------------------------
+
++---------------------+--------------------------------------------------------+
+| **User Class/Role** | **User Knowledge and Skill Levels** |
++=====================+========================================================+
+| Developer | Competence in one or more programming languages |
+| | supported (C++, R, Python, C#). No knowledge of |
+| | molecular biology wet lab techniques required. |
++---------------------+--------------------------------------------------------+
+
+3.4. User Operations and Practices
+----------------------------------
+
+Developer users will interact with the software by incorporating the library
+into a client application.
+
+3.5. Operating Environment
+--------------------------
+
+The software is intended to be run in a Linux or OSX environment, with ideally 4
+or more cores.
+
+3.6. Design and Implementation Constraints
+------------------------------------------
+
+Currently there are no constraints outside the operating environment and speed
+requirements. In particular, as the library will be used for writing the BAM
+files coming off a Sequel instrument, it should be able to keep pace.
+
+3.7. Assumptions and Dependencies
+---------------------------------
+
+Input routines for the library will expect to receive files that conform to the
+`PacBio BAM`_ (2) or `DataSet XML`_ (4) specifications.
+
+The pbbam library depends on Boost, zlib, and htslib libraries.
+
+3.8. Other Software
+-------------------
+
+Output PacBio BAMs will be compatible with the `PacBio BAM`_ specification (2)
+and thus compatible with the general `BAM format`_ specification (1). This
+ensures that a wide variety of downstream tools can interact with data files.
+
+The software uses `CMake`_ as its build system.
+
+The core C++ API relies on the following 3rd party components:
+
+* `zlib`_
+* `htslib`_
+* `Boost`_ (header-only modules)
+
+Wrapper APIs for additional languages (Python, R, C#) are generated by `SWIG`_.
+
+API documentation is generated via `Doxygen`_.
+
+.. _CMake: https://cmake.org/
+.. _zlib: http://www.zlib.net/
+.. _htslib: https://github.com/samtools/htslib
+.. _Boost: http://www.boost.org/
+.. _SWIG: http://www.swig.org/
+.. _Doxygen: http://www.stack.nl/~dimitri/doxygen/
+
+4. External Interfaces
+======================
+
+4.1. User Interfaces
+--------------------
+
+N/A
+
+4.2. Software Interfaces
+------------------------
+
+pbbam will require the following software:
+
+* `htslib`_ & `zlib`_ - provides low-level handling of compressed BAM data
+* `Boost`_ - provides utility classes
+
+Incoming data from upstream components will be compliant with
+PacBio BAM format - see `PacBio BAM`_ specification (2) for more detail.
+
+4.3. Hardware Interfaces
+------------------------
+
+N/A
+
+4.4. Communications Interfaces
+------------------------------
+
+N/A
+
+5. Functional Requirements
+==========================
+
+5.1. Query BAM data by genomic region
+-----------------------------------------
+
+5.1.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall allow client applications to query data, limited to some genomic
+region of interest.
+
+5.1.2. Inputs
+~~~~~~~~~~~~~
+
+* BAM file(s) or DataSet XML
+* a standard index (.bai) for each source BAM file
+* genomic interval (e.g. "chr1:1000-2000")
+
+5.1.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Obtain an `htslib`_ "iterator" object for a given file and region. This will be
+wrapped by pbbam to hide the low-level nature of this type, as well as handling
+memory lifetime.
+
+5.1.4. Outputs
+~~~~~~~~~~~~~~
+
+Iterator providing access to individual BAM records from the input data sources,
+which are aligned to the requested genomic interval.
+
+For example:
+
+.. code:: c++
+
+ GenomicIntervalQuery query(interval, dataset);
+ for (const BamRecord& record : query) {
+ // ... do stuff ...
+ }
+
+
+5.1.5. Regulatory Compliance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+5.2. Query BAM data by filter criteria
+-----------------------------------------
+
+5.2.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall allow client applications to query data, limited to some filter
+criteria (e.g. only reads from ZMW hole number 200 with a read quality of >0.5).
+
+5.2.2. Inputs
+~~~~~~~~~~~~~
+
+* BAM file(s) or DataSet XML
+* a `PacBio BAM index`_ (.pbi) for each source BAM file
+* filters supported by data contained in the PBI
+
+5.2.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Query PBI files(s) for records that match the provided filter criteria. Merge
+contiguous runs of records into record blocks, to minimize seeks. Advancing the
+iterator either reads the next read from the current block or seeks to the next
+block and fetches the next record.
+
+5.2.4. Outputs
+~~~~~~~~~~~~~~
+
+Iterator providing access to individual BAM records from the input data sources,
+which satisfy the requested filter criteria.
+
+For example:
+
+.. code:: c++
+
+ PbiFilterQuery query(filter, dataset);
+ for (const BamRecord& record : query) {
+ // ... do stuff ...
+ }
+
+5.2.5. Regulatory Compliance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+5.3. Write PacBio BAM data
+------------------------------------------
+
+5.3.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall be able to write `PacBio BAM`_ files conforming to the specification.
+
+5.3.2. Inputs
+~~~~~~~~~~~~~
+
+* filename
+* header information
+* BAM records
+
+5.3.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Create file handle for the provided filename, output initial header information.
+As records are passed in, write to file. Upon completion, flush any buffers and
+close file handle.
+
+Multithreading, provided by `htslib`_, will be utilized where possible to speed
+up the compression process - often then main bottleneck of BAM throughput.
+
+5.3.4. Outputs
+~~~~~~~~~~~~~~
+
+BAM file conforming to the `PacBio BAM`_ specification.
+
+5.3.5. Regulatory Compliance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+5.4. Create PacBio BAM index file
+------------------------------------------
+
+5.4.1. Description
+~~~~~~~~~~~~~~~~~~
+
+Much of PacBio BAM data processing relies on the presence of a `PacBio BAM index`_
+file. pbbam shall be able to generate this file type for a `PacBio BAM`_ file.
+
+5.4.2. Inputs
+~~~~~~~~~~~~~
+
+`PacBio BAM`_ file
+
+5.4.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Read through the input BAM records, storing the values relevant to a PBI index.
+At end of file, write the index contents to a file and close.
+
+5.4.4. Outputs
+~~~~~~~~~~~~~~
+
+`PacBio BAM index`_ file
+
+5.4.5. Regulatory Compliance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+6. Non-Functional Requirements
+==============================
+
+6.1. Performance Requirements
+-----------------------------
+
+Since pbbam will be used to write all BAM files coming off a Sequel device, the
+library must keep pace with data generation requirements.
+
+** come back to this, hard numbers ?? **
+
+6.2. Safety Requirements
+------------------------
+
+N/A
+
+6.3. Security Requirements
+--------------------------
+
+N/A
+
+6.4. Quality Attributes
+-----------------------
+
+6.4.1. Availability
+~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+6.4.2. Integrity
+~~~~~~~~~~~~~~~~
+
+Inputs and outputs shall adhere to the PacBio BAM or DataSet XML specifications.
+Files that do not meet this requirement will raise exceptions and will not be
+accepted.
+
+6.4.3. Interoperability
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Inputs and outputs shall adhere to the PacBio BAM or DataSet XML specifications.
+
+6.4.4. Reliability
+~~~~~~~~~~~~~~~~~~
+
+The developed software shall meet the overall product reliability requirements.
+
+6.4.5. Robustness
+~~~~~~~~~~~~~~~~~
+
+pbbam will raise exceptions upon encountering failure cases, allowing client
+applications to recover or report the error to a UI.
+
+6.4.6. Usability
+~~~~~~~~~~~~~~~~
+
+pbbam shall have comprehensive API documentation, available both on- and offline.
+Further documentation will be provided for installation, API usage tips, etc.
+
+Raised exceptions shall carry as much information as possible so that client
+applications can respond with appropriate actions or display useful messages.
+
+6.4.7. Maintainability
+~~~~~~~~~~~~~~~~~~~~~~
+
+The source code of the software covered in this functional specification shall
+adhere to the PacBio `Software Style Guide`_ (9) work instruction, to guarantee
+high quality of code that facilitates maintainability.
+
+6.4.8. Customizability
+~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+6.5. Business Rules
+-------------------
+
+N/A
+
+6.6. Installation and Upgrade
+-----------------------------
+
+Installation and Upgrade of this software will be handled as part of the SMRT
+Analysis subsystem. See `SMRT Analysis`_ (6) specifications for more detail.
+
+Additionally, the library may be built independently, either from internal
+version control (Perforce) or from the public-facing Github repository. In
+either case, `CMake`_ is used to drive the build process.
+
+6.7. Administration
+-------------------
+
+N/A
+
+6.8. User Documentation
+-----------------------
+
+pbbam shall have comprehensive API documentation, available both on- and offline.
+Further documentation will be provided for installation, API usage tips, etc.
+
+The "offline" API documentation may be built directly from the source code, using
+`Doxygen`_. Online documentation will be generated via a continuous integration
+server, thus ensuring it is always pointing to the current codebase.
+
+7. High Level Design
+====================
+
+7.1. Top Level Context
+----------------------
+
+The pbbam library is intended to be linked in with client applications,
+providing programmatic access to data files.
+
+7.2. Use Cases
+--------------
+
+Primary use cases for pbbam include:
+
+* BAM file creation
+* BAM file query - iterable access to various subsets of data
+
+8. Detailed Design
+==================
+
+8.1. Structural Representation
+------------------------------
+
+ *image(s) here*
+
+8.2. Behavioral Representation
+------------------------------
+
+This section provides behavioral (dynamic) representation of how the
+elements of the system realize the required use cases.
+
+Describe how the significant subsystems and classes interact with each
+other to realize the architecturally significant use cases.
+
+Provide a link to a file containing Sequence Diagram or Activity Diagram, when applicable.
+The link may be provided with use of 'image' directive.
+
+Sequence Diagram shows one use case scenario, executed by class model,
+with sequence of operations over period of time (time increased from top
+to bottom). It shows interactions between objects, but does not show
+relationships between them.
+
+Activity Diagram is a virtual representation of the sequential flow and
+control logic of a set of related activities or actions. It is a type of
+flowchart, frequently called Swim Lane Diagram, because activities of
+each entity are presented within its swim lane.
+
+Note: You may use http://wsd tool to auto-generate a sequence diagram from
+a descriptive text file, save the diagram to the wsd site, get link to the image,
+and add this link to the document with use of 'image' directive.
+
+8.3. Information Storage
+------------------------
+
+pbbam software requires no persistent storage outside of availability of input
+and output during analysis.
+
+8.4. Technology Overview
+------------------------
+
+pbbam is implemented in C++-11 and should perform as designed on any UNIX-like
+operating system (Linux distributions, Apple OSX, etc.).
+
+8.5. SOUP Components
+--------------------
+
+pbbam utilizes CMake for its build system. The C++ library uses the following
+3rd-party software components: Boost, htslib, & zlib. Wrappers for additional
+languages are generated using SWIG.
+
+8.6. Deployment and Configuration
+---------------------------------
+
+Please refer to `SMRT Analysis`_ (6) documentation
+
+9. Automated Tests
+==================
+
+9.1. Unit Testing
+-----------------
+
+The library shall have unit tests for all classes & components.
+
+9.2. Performance Testing
+------------------------
+
+Unit tests may evaluate performance requirements as desired.
+
+9.3. Regression Testing
+-----------------------
+
+As its role is primarily in data I/O, pbbam has no "scientific quality/validity"
+metrics that would indicate a regression. Instead, passing its unit tests and
+end-to-end tests will indicate that a regression has not been introduced.
+
+These tests will be run after each check-in and nightly.
+
+10. Requirements Traceability Matrices
+======================================
+
+This section provides traces from requirements specified in PRD/DIR documents to the
+requirements covered in this functional specification, and from these
+functional requirements to corresponding Test Cases/Procedures.
+
+10.1. HPQC Functional Specifications
+------------------------------------
+
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+| **PBI_ID** | **Name** | **Description** | **Comment** | **Metric** | **Owner** | **PRD/DIR Path** |
++=============+===========================+===================================================+=============+============+===========+===========================================+
+| 5.1 | Query BAM data by | pbbam shall allow client applications to query | | | dbarnett | |
+| | genomic region | data, limited to some genomic region of interest. | | | | |
+| | | | | | | |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+| 5.2 | Query BAM data by | pbbam shall allow client applications to query | | | dbarnett | |
+| | filter criteria | data, limited to some filter criteria (e.g. only | | | | |
+| | | reads from ZMW hole number 200 with a read | | | | |
+| | | quality of >0.5). | | | | |
+| | | | | | | |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+| 5.3 | Write PacBio BAM data | pbbam shall be able to write files conforming to | | | dbarnett | |
+| | | the `PacBio BAM`_ specifictation. | | | | |
+| | | | | | | |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+| 5.4 | Create PacBio BAM index | Much of PacBio BAM data processing relies on the | | | dbarnett | |
+| | file | presence of a `PacBio BAM index`_ file. pbbam | | | | |
+| | | shall be able to generate this file type for a | | | | |
+| | | `PacBio BAM`_ file. | | | | |
+| | | | | | | |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+-------------------------------------------+
+
+10.2. Automated Tests Coverage
+------------------------------
+
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| **FS Item** | **FS Item Title** | **Use Case Description** | **Test Case Name/ID** |
++=============+===========================+====================================================+==================================================================+
+| 5.1 | Query BAM data by | pbbam shall allow client applications to query | TODO |
+| | genomic region | data, limited to some genomic region of interest. | |
+| | | | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.2 | Query BAM data by | pbbam shall allow client applications to query | TODO |
+| | filter criteria | data, limited to some filter criteria (e.g. only | |
+| | | reads from ZMW hole number 200 with a read | |
+| | | quality of >0.5). | |
+| | | | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.3 | Write PacBio BAM data | pbbam shall be able to write files conforming to | TODO |
+| | | the `PacBio BAM`_ specifictation. | |
+| | | | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.4 | Create PacBio BAM index | Much of PacBio BAM data processing relies on the | TODO |
+| | file | presence of a `PacBio BAM index`_ file. pbbam | |
+| | | shall be able to generate this file type for a | |
+| | | `PacBio BAM`_ file. | |
+| | | | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+
--- /dev/null
+=============================================================
+**Pbbam Core API Software Design & Functional Specification**
+=============================================================
+| *Version 0.2*
+| *Pacific Biosciences Engineering Group*
+| *Oct 17, 2016*
+
+1. Revision History
+===================
+
++-------------+---------------+--------------------+---------------------------------+
+| **Date** | **Revision** | **Author(s)** | **Comments** |
++=============+===============+====================+=================================+
+| 01-29-2016 | 0.1 | Derek Barnett | Initial draft created |
+| | | | |
++-------------+---------------+--------------------+---------------------------------+
+| 10-17-2016 | 0.2 | Derek Barnett | Added behavioral representation |
+| | | | and structural representation |
+| | | | diagram |
++-------------+---------------+--------------------+---------------------------------+
+
+2. Introduction
+===============
+
+2.1. Document Specification Identifier
+--------------------------------------
+
++-----------------------------------+------------------------------------------+
+| **Document Specification Prefix** | **Description** |
++===================================+==========================================+
+| FS\_SA\_PBBAM\_ | Functional spec for pbbam |
++-----------------------------------+------------------------------------------+
+
+2.2. Purpose
+------------
+
+This document is intended to describe the requirements and interface of the pbbam
+library, which provides functionality for creating, querying, and editing PacBio
+BAM files and associated file formats.
+
+2.3. Scope of Document
+----------------------
+
+This document covers the expected usage of the pbbam library, as well as any
+desired or required performance characteristics with respect to quality or speed.
+
+This document does not provide installation instructions or API documentation.
+
+2.4. Glossary of Terms
+----------------------
+
+The table below specifies only terms specific to this document, and skips
+acronyms/terms that are specified in `Pacific Biosciences Software Glossary`_.
+
+.. _Pacific Biosciences Software Glossary: http://smrtanalysis-docs/pb_sw_glossary.html
+
++------------------+-----------------------------------------------------------+
+| **Acronym/Term** | **Description** |
++==================+===========================================================+
+| API | Application Programming Interface - a set of routines, |
+| | protocols, and tools for building software applications. |
+| | In this document, this will consist of one or more |
+| | cooperating libraries that specify data structures, |
+| | methods, etc. for use within a target programming |
+| | language. |
++------------------+-----------------------------------------------------------+
+| Client | An application that uses the library. |
++------------------+-----------------------------------------------------------+
+| I/O | Input/output of data. |
++------------------+-----------------------------------------------------------+
+
+2.5. References
+---------------
+
++-------------+------------------------------+--------------------------------------+
+| **Ref No.** | **Document Name, Link** | **Description** |
++=============+==============================+======================================+
+| (1) | `BAM format`_ | General SAM/BAM specification |
++-------------+------------------------------+--------------------------------------+
+| (2) | `PacBio BAM`_ | PacBio BAM specification |
++-------------+------------------------------+--------------------------------------+
+| (3) | `PacBio BAM index`_ | PacBio BAM index specification |
++-------------+------------------------------+--------------------------------------+
+| (4) | `DataSet XML`_ | PacBio DataSet XML specification |
++-------------+------------------------------+--------------------------------------+
+| (5) | `Software Style Guide`_ | PacBio coding standards |
++-------------+------------------------------+--------------------------------------+
+| (6) | `SMRT Analysis`_ | General SMRT Analysis infrastructure |
++-------------+------------------------------+--------------------------------------+
+
+.. _BAM format: https://samtools.github.io/hts-specs/SAMv1.pdf
+.. _PacBio BAM: http://pacbiofileformats.readthedocs.org/en/3.0/BAM.html
+.. _PacBio BAM index: http://pacbiofileformats.readthedocs.org/en/3.0/PacBioBamIndex.html
+.. _DataSet XML: https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/DataSet.rst
+.. _Software Style Guide: http://smrtanalysis-docs/_downloads/PBISoftwareStyleGuide.doc
+.. _SMRT Analysis: http://smrtanalysis-docs/smrt_docs.html
+
+3. Software Overview
+====================
+
+3.1. Software Module Description
+--------------------------------
+
+As of the 3.0 release of SMRT Analysis, PacBio is embracing the industry standard
+`BAM format`_ (1) for (both aligned and unaligned) basecall data files. We have
+also formulated a BAM companion file format (.bam.pbi) enabling fast access to a
+richer set of per-read information as well as compatibility for software built
+around the legacy cmp.h5 format.
+
+The pbbam library provides components to create, query, & transform PacBio BAM
+data: sequence files and their associated indices. This includes a core C++
+library as well as bindings for additional programming languages.
+
+3.2. Software Module Functional Capabilities
+--------------------------------------------
+
+The library must be able to read and write BAM files that conform to the
+`PacBio BAM`_ specification (2). BAM records must be editable e.g. adding
+alignment information. Random access must be supported, whether by genomic
+region or by filtering record features. To this end, the library will be able to
+read, write, and create associated index files - both the standard BAM index
+(.bai) and the `PacBio BAM index`_ (.pbi) (3). In addition to working with
+individual files, datasets of related BAM files will be supported. These are
+described in a `DataSet XML`_ document. (4)
+
+3.3. User Characteristics
+-------------------------
+
++---------------------+--------------------------------------------------------+
+| **User Class/Role** | **User Knowledge and Skill Levels** |
++=====================+========================================================+
+| Developer | Competence in one or more programming languages |
+| | supported (C++, R, Python, C#). No knowledge of |
+| | molecular biology wet lab techniques required. |
++---------------------+--------------------------------------------------------+
+
+3.4. User Operations and Practices
+----------------------------------
+
+Developer users will interact with the software by incorporating the library
+into a client application.
+
+3.5. Operating Environment
+--------------------------
+
+The software is intended to be run in a Linux or OSX environment, with ideally 4
+or more cores.
+
+3.6. General Constraints
+------------------------
+
+Currently there are no constraints outside the operating environment and speed
+requirements. In particular, as the library will be used for writing the BAM
+files coming off a Sequel instrument, it should be able to keep pace.
+
+3.7. Assumptions and Dependencies
+---------------------------------
+
+Input routines for the library will expect to receive files that conform to the
+`PacBio BAM`_ (2) or `DataSet XML`_ (4) specifications.
+
+The pbbam library depends on Boost, zlib, and htslib libraries.
+
+3.8. Other Software
+-------------------
+
+Output PacBio BAMs will be compatible with the `PacBio BAM`_ specification (2)
+and thus compatible with the general `BAM format`_ specification (1). This
+ensures that a wide variety of downstream tools can interact with data files.
+
+The software uses `CMake`_ as its build system.
+
+The core C++ API relies on the following 3rd party components:
+
+* `zlib`_
+* `htslib`_
+* `Boost`_ (header-only modules)
+
+Wrapper APIs for additional languages (Python, R, C#) are generated by `SWIG`_.
+
+API documentation is generated via `Doxygen`_.
+
+.. _CMake: https://cmake.org/
+.. _zlib: http://www.zlib.net/
+.. _htslib: https://github.com/samtools/htslib
+.. _Boost: http://www.boost.org/
+.. _SWIG: http://www.swig.org/
+.. _Doxygen: http://www.stack.nl/~dimitri/doxygen/
+
+4. External Interfaces
+======================
+
+4.1. User Interfaces
+--------------------
+
+N/A
+
+4.2. Software Interfaces
+------------------------
+
+pbbam will require the following software:
+
+* `htslib`_ & `zlib`_ - provides low-level handling of compressed BAM data
+* `Boost`_ - provides utility classes
+
+Incoming data from upstream components will be compliant with
+PacBio BAM format - see `PacBio BAM`_ specification (2) for more detail.
+
+4.3. Hardware Interfaces
+------------------------
+
+N/A
+
+4.4. Communications Interfaces
+------------------------------
+
+N/A
+
+5. Functional Requirements
+==========================
+
+5.1. Query BAM data by genomic region
+-------------------------------------
+
+5.1.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall allow client applications to query data, limited to some genomic
+region of interest.
+
+5.1.2. Inputs
+~~~~~~~~~~~~~
+
+* BAM file(s) or DataSet XML
+* a standard index (.bai) for each source BAM file
+* genomic interval (e.g. "chr1:1000-2000")
+
+5.1.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Obtain an `htslib`_ "iterator" object for a given file and region. This will be
+wrapped by pbbam to hide the low-level nature of this type, as well as handling
+memory lifetime.
+
+5.1.4. Outputs
+~~~~~~~~~~~~~~
+
+Iterator providing access to individual BAM records from the input data sources,
+which are aligned to the requested genomic interval.
+
+For example:
+
+.. code:: c++
+
+ GenomicIntervalQuery query(interval, dataset);
+ for (const BamRecord& record : query) {
+ // ... use record data ...
+ }
+
+
+5.2. Query BAM data by filter criteria
+--------------------------------------
+
+5.2.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall allow client applications to query data, limited to some filter
+criteria (e.g. only reads from ZMW hole number 200 with a read quality of >0.5).
+
+5.2.2. Inputs
+~~~~~~~~~~~~~
+
+* BAM file(s) or DataSet XML
+* a `PacBio BAM index`_ (.pbi) for each source BAM file
+* filters supported by data contained in the PBI
+
+5.2.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Query PBI files(s) for records that match the provided filter criteria. Merge
+contiguous runs of records into record blocks, to minimize seeks. Advancing the
+iterator either reads the next read from the current block or seeks to the next
+block and fetches the next record.
+
+5.2.4. Outputs
+~~~~~~~~~~~~~~
+
+Iterator providing access to individual BAM records from the input data sources,
+which satisfy the requested filter criteria.
+
+For example:
+
+.. code:: c++
+
+ PbiFilterQuery query(filter, dataset);
+ for (const BamRecord& record : query) {
+ // ... do stuff ...
+ }
+
+
+5.3. Write PacBio BAM data
+--------------------------
+
+5.3.1. Description
+~~~~~~~~~~~~~~~~~~
+
+pbbam shall be able to write `PacBio BAM`_ files conforming to the specification.
+
+5.3.2. Inputs
+~~~~~~~~~~~~~
+
+* filename
+* header information
+* BAM records
+
+5.3.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Create file handle for the provided filename, output initial header information.
+As records are passed in, write to file. Upon completion, flush any buffers and
+close file handle.
+
+Multithreading, provided by `htslib`_, will be utilized where possible to speed
+up the compression process - often then main bottleneck of BAM throughput.
+
+5.3.4. Outputs
+~~~~~~~~~~~~~~
+
+BAM file conforming to the `PacBio BAM`_ specification.
+
+5.4. Create PacBio BAM index file
+---------------------------------
+
+5.4.1. Description
+~~~~~~~~~~~~~~~~~~
+
+Much of PacBio BAM data processing relies on the presence of a `PacBio BAM index`_
+file. pbbam shall be able to generate this file type for a `PacBio BAM`_ file.
+
+5.4.2. Inputs
+~~~~~~~~~~~~~
+
+`PacBio BAM`_ file
+
+5.4.3. Processing
+~~~~~~~~~~~~~~~~~
+
+Read through the input BAM records, storing the values relevant to a PBI index.
+At end of file, write the index contents to a file and close.
+
+5.4.4. Outputs
+~~~~~~~~~~~~~~
+
+`PacBio BAM index`_ file
+
+6. Non-Functional Requirements
+==============================
+
+6.1. Performance Requirements
+-----------------------------
+
+Since pbbam will be used to write all BAM files coming off a Sequel instrument, the
+library must keep pace with data generation requirements.
+
+6.2. Safety Requirements
+------------------------
+
+N/A
+
+6.3. Security Requirements
+--------------------------
+
+N/A
+
+6.4. Quality Attributes
+-----------------------
+
+6.4.1. Availability
+~~~~~~~~~~~~~~~~~~~
+
+The developed software shall meet the overall product availability requirements.
+
+6.4.2. Data Integrity
+~~~~~~~~~~~~~~~~~~~~~
+
+Inputs and outputs shall adhere to the PacBio BAM or DataSet XML specifications.
+Files that do not meet this requirement will raise exceptions and will not be
+accepted.
+
+6.4.3. Interoperability
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Inputs and outputs shall adhere to the PacBio BAM or DataSet XML specifications.
+
+6.4.4. Reliability
+~~~~~~~~~~~~~~~~~~
+
+The developed software shall meet the overall product reliability requirements.
+
+6.4.5. Robustness
+~~~~~~~~~~~~~~~~~
+
+pbbam will raise exceptions upon encountering failure cases, allowing client
+applications to recover or report the error to a UI.
+
+6.4.6. Usability
+~~~~~~~~~~~~~~~~
+
+pbbam shall have comprehensive API documentation, available both on- and offline.
+Further documentation will be provided for installation, API usage tips, etc.
+
+Raised exceptions shall carry as much information as possible so that client
+applications can respond with appropriate actions or display useful messages.
+
+6.4.7. Maintainability
+~~~~~~~~~~~~~~~~~~~~~~
+
+The source code of the software covered in this functional specification shall
+adhere to the PacBio `Software Style Guide`_ (9) work instruction, to guarantee
+high quality of code that facilitates maintainability.
+
+6.4.8. Customizability
+~~~~~~~~~~~~~~~~~~~~~~
+
+N/A
+
+6.4.9. Compatibility
+~~~~~~~~~~~~~~~~~~~~
+
+pbbam shall support backward compatibility of the API and BAM format versions
+in order not to break existing clients.
+
+6.5. Business Rules
+-------------------
+
+N/A
+
+6.6. Compliance Requirements
+----------------------------
+
+N/A
+
+6.7. Alarms and Error Handling
+------------------------------
+
+Raised exceptions shall carry as much information as possible so that client
+applications can respond with appropriate actions or display useful messages.
+
+6.8. Persistence Requirements
+-----------------------------
+
+pbbam software requires no persistent storage outside of availability of input
+and output during analysis.
+
+6.9. Installation and Upgrade
+-----------------------------
+
+Installation and Upgrade of this software will be handled as part of the SMRT
+Analysis subsystem. See `SMRT Analysis`_ (6) specifications for more detail.
+
+Additionally, the library may be built independently, either from internal
+version control (Perforce) or from the public-facing Github repository. In
+either case, `CMake`_ is used to drive the build process.
+
+6.10. Administration and Maintenance
+------------------------------------
+
+N/A
+
+6.11. User Documentation
+------------------------
+
+pbbam shall have comprehensive API documentation, available both on- and offline.
+Further documentation will be provided for installation, API usage tips, etc.
+
+The "offline" API documentation may be built directly from the source code, using
+`Doxygen`_. Online documentation will be generated via a continuous integration
+server, thus ensuring it is always pointing to the current codebase.
+
+7. High Level Design
+====================
+
+7.1. Top Level Context
+----------------------
+
+The pbbam library is intended to be linked in with client applications,
+providing programmatic access to data files.
+
+7.2. Use Cases
+--------------
+
+Primary use cases for pbbam include:
+
+* BAM file creation
+* BAM file query - iterable access to various subsets of data
+
+8. Detailed Design
+==================
+
+8.1. Structural Representation
+------------------------------
+
+.. image:: ./pbbam_structure.png
+
+8.2. Behavioral Representation
+------------------------------
+
+The typical access pattern involves a client query against BAM data, optionally
+described in DataSet XML. The query may involve some number of filter criteria.
+
+pbbam queries the associated index files (*.pbi) to pre-determine which records
+pass filtering criteria and where they reside on disk. The client code is given
+an iterable object, such that each iteration of the main access loop returns a
+valid BAM record for analysis, modification, etc.
+
+8.3. Information Storage
+------------------------
+
+pbbam software requires no persistent storage outside of availability of input
+and output during analysis.
+
+8.4. Technology Overview
+------------------------
+
+pbbam is implemented in C++-11 and should perform as designed on any UNIX-like
+operating system (Linux distributions, Apple OSX, etc.).
+
+8.5. SOUP Components
+--------------------
+
+pbbam utilizes CMake for its build system. The C++ library uses the following
+3rd-party software components: `Boost`_, `htslib`_, & `zlib`_. Wrappers for additional
+languages are generated using SWIG.
+
+8.6. Deployment and Configuration
+---------------------------------
+
+Please refer to `SMRT Analysis`_ (6) documentation
+
+9. Automated Tests
+==================
+
+9.1. Unit Testing
+-----------------
+
+The library shall have unit tests for all classes & components.
+
+9.2. Performance Testing
+------------------------
+
+Unit tests may evaluate performance requirements as desired.
+
+9.3. Regression Testing
+-----------------------
+
+As its role is primarily in data I/O, pbbam has no "scientific quality/validity"
+metrics that would indicate a regression. Instead, passing its unit tests and
+end-to-end tests will indicate that a regression has not been introduced.
+
+These tests will be run after each check-in and nightly.
+
+10. Requirements Traceability Matrices
+======================================
+
+This section provides traces from requirements specified in PRD/DIR documents to the
+requirements covered in this functional specification, and from these
+functional requirements to corresponding Test Cases/Procedures.
+
+10.1. HPQC Functional Specifications
+------------------------------------
+
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+--------------------------------------------------+
+| **PBI_ID** | **Name** | **Description** | **Comment** | **Metric** | **Owner** | **PRD/DIR Path** |
++=============+===========================+===================================================+=============+============+===========+==================================================+
+| 5.1 | Query BAM data by | pbbam shall allow client applications to query | | Yes | dbarnett | \\DIR\\Functionality\\Software\Common\APIs\\ |
+| | genomic region | data, limited to some genomic region of interest. | | | | Software shall provide an API to allow 3rd |
+| | | | | | | party software to extract all run information |
+| | | | | | | including summary reports and locations |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+--------------------------------------------------+
+| 5.2 | Query BAM data by | pbbam shall allow client applications to query | | Yes | dbarnett | \\DIR\\Functionality\\Software\Common\APIs\\ |
+| | filter criteria | data, limited to some filter criteria (e.g. only | | | | Software shall provide an API to allow 3rd |
+| | | reads from ZMW hole number 200 with a read | | | | party software to extract all run information |
+| | | quality of >0.5). | | | | including summary reports and locations |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+--------------------------------------------------+
+| 5.3 | Write PacBio BAM data | pbbam shall be able to write files conforming to | | Yes | dbarnett | \\DIR\\Functionality\\Software\\PostProcessing\\ |
+| | | the `PacBio BAM`_ specification. | | | | Software shall provide base files including |
+| | | | | | | kinetic information in industry standard format |
+| | | | | | | such as SAM/BAM using current specifications |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+--------------------------------------------------+
+| 5.4 | Create PacBio BAM index | Much of PacBio BAM data processing relies on the | | Yes | dbarnett | \\DIR\\Functionality\\Software\\PostProcessing\\ |
+| | file | presence of a `PacBio BAM index`_ file. pbbam | | | | Software shall provide base files including |
+| | | shall be able to generate this file type for a | | | | kinetic information in industry standard format |
+| | | `PacBio BAM`_ file. | | | | such as SAM/BAM using current specifications |
++-------------+---------------------------+---------------------------------------------------+-------------+------------+-----------+--------------------------------------------------+
+
+10.2. Automated Tests Coverage
+------------------------------
+
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| **FS Item** | **FS Item Title** | **Use Case Description** | **Test Case Name/ID** |
++=============+===========================+====================================================+==================================================================+
+| 5.1 | Query BAM data by | pbbam shall allow client applications to query | See section 9.1. Unit Testing. |
+| | genomic region | data, limited to some genomic region of interest. | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.2 | Query BAM data by | pbbam shall allow client applications to query | See section 9.1. Unit Testing. |
+| | filter criteria | data, limited to some filter criteria (e.g. only | |
+| | | reads from ZMW hole number 200 with a read | |
+| | | quality of >0.5). | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.3 | Write PacBio BAM data | pbbam shall be able to write files conforming to | See section 9.1. Unit Testing. |
+| | | the `PacBio BAM`_ specification. | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+| 5.4 | Create PacBio BAM index | Much of PacBio BAM data processing relies on the | See section 9.1. Unit Testing. |
+| | file | presence of a `PacBio BAM index`_ file. pbbam | |
+| | | shall be able to generate this file type for a | |
+| | | `PacBio BAM`_ file. | |
++-------------+---------------------------+----------------------------------------------------+------------------------------------------------------------------+
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Accuracy.h
+/// \brief Defines the Accuracy class.
+//
+// Author: Derek Barnett
+
+#ifndef ACCURACY_H
+#define ACCURACY_H
+
+#include "pbbam/Config.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The Accuracy class represents the expected accuracy of a BamRecord.
+///
+/// Values are clamped to fall within [0,1].
+///
+class PBBAM_EXPORT Accuracy
+{
+public:
+ static const float MIN; ///< Minimum valid accuracy value [0.0]
+ static const float MAX; ///< Maximum valid accuracy value [1.0]
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// Constructs an Accuracy object from a floating-point number.
+ ///
+ /// \note This is not an \b explicit ctor, to make it as easy as
+ /// possible to use in numeric operations. We really just want
+ /// to make sure that the acceptable range is respected.
+ ///
+ Accuracy(float accuracy);
+ Accuracy(const Accuracy& other);
+ ~Accuracy(void);
+
+ /// \}
+
+public:
+ /// \returns Accuracy as float primitive
+ operator float(void) const;
+
+private:
+ float accuracy_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/Accuracy.inl"
+
+#endif // ACCURACY_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file AlignmentPrinter.h
+/// \brief Defines the AlignmentPrinter class.
+//
+// Author: Armin Töpfer
+
+#ifndef ALIGNMENTPRINTER_H
+#define ALIGNMENTPRINTER_H
+
+#include <memory>
+#include <string>
+#include "pbbam/BamRecord.h"
+#include "pbbam/IndexedFastaReader.h"
+#include "pbbam/Orientation.h"
+
+namespace PacBio {
+namespace BAM {
+
+class BamRecord;
+
+/// \brief The AlignmentPrinter class "pretty-prints" an alignment with respect
+/// to its associated reference sequence.
+///
+/// Example output:
+/// \verbinclude plaintext/AlignmentPrinterOutput.txt
+///
+class AlignmentPrinter
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// Constructs the alignment printer with an associated FASTA file reader.
+ ///
+ /// \param[in] ifr FASTA reader
+ ///
+ /// \throws std::runtime_error if FASTA file cannot be opened for reading.
+ ///
+ AlignmentPrinter(const IndexedFastaReader& ifr);
+
+ AlignmentPrinter(void) = delete;
+ AlignmentPrinter(const AlignmentPrinter&) = delete;
+ AlignmentPrinter(AlignmentPrinter&&) = default;
+ AlignmentPrinter& operator=(const AlignmentPrinter&) = delete;
+ AlignmentPrinter& operator=(AlignmentPrinter&&) = default;
+ ~AlignmentPrinter(void) = default;
+
+ /// \}
+
+public:
+ /// \name Printing
+ /// \{
+
+ /// Pretty-prints an aligned BamRecord to std::string.
+ ///
+ /// \note The current implementation includes ANSI escape sequences for
+ /// coloring terminal output. Future versions of this method will
+ /// likely make this optional.
+ ///
+ /// \returns formatted string containing the alignment and summary
+ /// information
+ ///
+ std::string Print(const BamRecord& record,
+ const Orientation orientation = Orientation::GENOMIC);
+
+ /// \}
+
+private:
+ const std::unique_ptr<IndexedFastaReader> ifr_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // ALIGNMENTPRINTER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BaiIndexedBamReader.h
+/// \brief Defines the BaiIndexedBamReader class.
+//
+// Author: Derek Barnett
+
+#ifndef BAIINDEXEDBAMREADER_H
+#define BAIINDEXEDBAMREADER_H
+
+#include "pbbam/BamReader.h"
+#include "pbbam/BamFile.h"
+#include "pbbam/GenomicInterval.h"
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { struct BaiIndexedBamReaderPrivate; }
+
+/// \brief The BaiIndexedBamReader class provides read-only iteration over %BAM
+/// records, bounded by a particular genomic interval.
+///
+/// The SAM/BAM standard index (*.bai) is used to allow random-access operations.
+///
+class PBBAM_EXPORT BaiIndexedBamReader : public BamReader
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Constructs %BAM reader, bounded by a genomic interval.
+ ///
+ /// All reads that overlap the interval will be available.
+ ///
+ /// \param[in] interval iteration will be bounded by this GenomicInterval.
+ /// \param[in] filename input %BAM filename
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.bai) fails to open
+ /// for reading, or if the interval is invalid
+ ///
+ BaiIndexedBamReader(const GenomicInterval& interval,
+ const std::string& filename);
+
+ /// \brief Constructs BAM reader, bounded by a genomic interval.
+ ///
+ /// All reads that overlap the interval will be available.
+ ///
+ /// \param[in] interval iteration will be bounded by this GenomicInterval.
+ /// \param[in] bamFile input BamFile object
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.bai) fails to open
+ /// for reading, or if the interval is invalid
+ ///
+ BaiIndexedBamReader(const GenomicInterval& interval, const BamFile& bamFile);
+
+ /// \brief Constructs %BAM reader, bounded by a genomic interval.
+ ///
+ /// All reads that overlap the interval will be available.
+ ///
+ /// \param[in] interval iteration will be bounded by this GenomicInterval.
+ /// \param[in] bamFile input BamFile object
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.bai) fails to open
+ /// for reading, or if the interval is invalid
+ ///
+ BaiIndexedBamReader(const GenomicInterval& interval, BamFile&& bamFile);
+
+ /// \}
+
+public:
+ /// \name Random-Access
+ /// \{
+
+ /// \returns the current GenomicInterval in use by this reader
+ const GenomicInterval& Interval(void) const;
+
+ /// \brief Sets a new genomic interval on the reader.
+ ///
+ /// \param[in] interval
+ /// \returns reference to this reader
+ ///
+ BaiIndexedBamReader& Interval(const GenomicInterval& interval);
+
+ /// \}
+
+protected:
+ int ReadRawData(BGZF* bgzf, bam1_t* b);
+
+private:
+ std::unique_ptr<internal::BaiIndexedBamReaderPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // BAIINDEXEDBAMREADER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamFile.h
+/// \brief Defines the BamFile class.
+//
+// Author: Derek Barnett
+
+#ifndef BAMFILE_H
+#define BAMFILE_H
+
+#include "pbbam/Config.h"
+#include "pbbam/BamHeader.h"
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { class BamFilePrivate; }
+
+/// \brief The BamFile class represents a %BAM file.
+///
+/// It provides access to header metadata and methods for finding/creating
+/// associated index files.
+///
+class PBBAM_EXPORT BamFile
+{
+public:
+
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates a BamFile object on the provided \p filename &
+ /// loads header information.
+ ///
+ /// \param[in] filename %BAM filename
+ /// \throws std::exception on failure to open %BAM file for reading
+ ///
+ BamFile(const std::string& filename);
+
+ BamFile(const BamFile& other);
+ BamFile(BamFile&& other);
+ BamFile& operator=(const BamFile& other);
+ BamFile& operator=(BamFile&& other);
+ ~BamFile(void);
+
+ /// \}
+
+public:
+
+ /// \name Index & Filename Methods
+ /// \{
+
+ /// \brief Creates a ".pbi" file for this %BAM file.
+ ///
+ /// \note Existing index file will be overwritten. Use
+ /// EnsurePacBioIndexExists() if this is not desired.
+ ///
+ /// \throws if PBI file could not be properly created and/or
+ /// written to disk
+ ///
+ void CreatePacBioIndex(void) const;
+
+ /// \brief Creates a ".bai" file for this %BAM file.
+ ///
+ /// \note Existing index file will be overwritten. Use
+ /// EnsureStandardIndexExists() if this is not desired.
+ ///
+ /// \throws if BAI file could not be properly created (e.g. this
+ /// %BAM is not coordinate-sorted) or could not be written to disk
+ ///
+ void CreateStandardIndex(void) const;
+
+ /// \brief Creates a ".pbi" file if one does not exist or is older than its
+ /// %BAM file.
+ ///
+ /// Equivalent to:
+ /// \code{.cpp}
+ /// if (!file.PacBioIndexExists())
+ /// file.CreatePacBioIndex();
+ /// \endcode
+ ///
+ /// \note As of v0.4.02+, no timestamp check is performed. Previously we requr
+ /// with an additional timestamp check.
+ ///
+ /// \throws if PBI file could not be properly created and/or
+ /// written to disk
+ ///
+ void EnsurePacBioIndexExists(void) const;
+
+ /// \brief Creates a ".bai" file if one does not exist or is older than its
+ /// %BAM file.
+ ///
+ /// Equivalent to:
+ /// \code{.cpp}
+ /// if (!file.StandardIndexExists())
+ /// file.CreateStandardIndex();
+ /// \endcode
+ ///
+ /// \note As of v0.4.2, no timestamp check is performed.
+ ///
+ /// \throws if BAI file could not be properly created (e.g. this
+ /// %BAM is not coordinate-sorted) or could not be written to disk
+ ///
+ void EnsureStandardIndexExists(void) const;
+
+ /// \returns %BAM filename
+ std::string Filename(void) const;
+
+ /// \returns true if %BAM file has EOF marker (empty BGZF block). Streamed
+ /// input (filename: "-")
+ bool HasEOF(void) const;
+
+ /// \returns true if ".pbi" exists and is newer than this %BAM file.
+ bool PacBioIndexExists(void) const;
+
+ /// \returns filename of %PacBio index file (".pbi")
+ /// \note No guarantee is made on the existence of this file.
+ /// This method simply returns the expected filename.
+ std::string PacBioIndexFilename(void) const;
+
+ /// \returns true if ".pbi" has a more recent timestamp than this file
+ bool PacBioIndexIsNewer(void) const;
+
+ /// \returns true if ".bai" exists
+ bool StandardIndexExists(void) const;
+
+ /// \note No guarantee is made on the existence of this file.
+ /// This method simply returns the expected filename.
+ std::string StandardIndexFilename(void) const;
+
+ /// \returns true if ".bai" has a more recent timestamp than this file
+ bool StandardIndexIsNewer(void) const;
+
+ /// \}
+
+public:
+ /// \name File Header Data
+ /// \{
+
+ /// \returns true if header metadata has this reference name
+ bool HasReference(const std::string& name) const;
+
+ /// \returns const reference to BamHeader containing the file's metadata
+ const BamHeader& Header(void) const;
+
+ /// \returns true if file is a %PacBio %BAM file (i.e. has non-empty version
+ /// associated with header "pb" tag)
+ bool IsPacBioBAM(void) const;
+
+ /// \returns ID for reference \p name (can be used for e.g.
+ /// GenomicIntervalQuery), or -1 if not found
+ int ReferenceId(const std::string& name) const;
+
+ /// \return name of reference matching \p id, empty string if not found
+ std::string ReferenceName(const int id) const;
+
+ /// \returns length of requested reference \p name. 0 if not found
+ uint32_t ReferenceLength(const std::string& name) const;
+
+ /// \returns length of requested reference \p id. 0 if not found
+ uint32_t ReferenceLength(const int id) const;
+
+ /// \}
+
+public:
+ /// \name Additional Attributes
+ /// \{
+
+ /// \returns virtual offset of first alignment. Intended mostly for internal
+ /// use. Note that this is a BGZF \b virtual offset, not a
+ /// 'normal' file position.
+ ///
+ int64_t FirstAlignmentOffset(void) const;
+
+ /// \}
+
+private:
+ std::unique_ptr<internal::BamFilePrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // BAMFILE_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamHeader.h
+/// \brief Defines the BamHeader class.
+//
+// Author: Derek Barnett
+
+#ifndef BAMHEADER_H
+#define BAMHEADER_H
+
+#include "pbbam/Config.h"
+#include "pbbam/ProgramInfo.h"
+#include "pbbam/ReadGroupInfo.h"
+#include "pbbam/SequenceInfo.h"
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { class BamHeaderPrivate; }
+
+/// \brief The BamHeader class represents the header section of the %BAM file.
+///
+/// It provides metadata about the file including file version, reference
+/// sequences, read groups, comments, etc.
+///
+/// A BamHeader may be fetched from a BamFile to view an existing file's
+/// metadata. Or one may be created/edited for use with writing to a new file
+/// (via BamWriter).
+///
+/// \note A particular BamHeader is likely to be re-used in lots of places
+/// throughout the library, for read-only purposes. For this reason, even
+/// though a BamHeader may be returned by value, it is essentially a thin
+/// wrapper for a shared-pointer to the actual data. This means, though,
+/// that if you need to edit an existing BamHeader for use with a
+/// BamWriter, please consider using BamHeader::DeepCopy. Otherwise any
+/// modifications will affect all BamHeaders that are sharing its
+/// underlying data.
+///
+class PBBAM_EXPORT BamHeader
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ BamHeader(void);
+ BamHeader(const std::string& samHeaderText);
+ BamHeader(const BamHeader& other);
+ BamHeader(BamHeader&& other);
+ BamHeader& operator=(const BamHeader& other);
+ BamHeader& operator=(BamHeader&& other);
+ ~BamHeader(void);
+
+ /// \brief Detaches underlying data from the shared-pointer, returning a
+ /// independent copy of the header contents.
+ ///
+ /// This ensures that any modifications to the newly returned BamHeader do
+ /// not affect other BamHeader objects that were sharing its underlying data.
+ ///
+ BamHeader DeepCopy(void) const;
+
+ /// \}
+
+public:
+ /// \name Operators
+ /// \{
+
+ /// \brief Merges another header with this one.
+ ///
+ /// Headers must be compatible for merging. This means that their Version,
+ /// SortOrder, PacBioBamVersion (and in the case of aligned BAM data,
+ /// Sequences) must all match. If not, an exception will be thrown.
+ ///
+ /// \param[in] other header to merge with this one
+ /// \returns reference to this header
+ ///
+ /// \throws std::runtime_error if the headers are not compatible
+ ///
+ BamHeader& operator+=(const BamHeader& other);
+
+ /// \brief Creates a new, merged header.
+ ///
+ /// Headers must be compatible for merging. This means that their Version,
+ /// SortOrder, PacBioBamVersion (and in the case of aligned BAM data,
+ /// Sequences) must all match. If not, an exception will be thrown.
+ ///
+ /// Both original headers (this header and \p other) will not be modified.
+ ///
+ /// \param[in] other header to merge with this one
+ /// \returns merged header
+ ///
+ /// \throws std::runtime_error if the headers are not compatible
+ ///
+ BamHeader operator+(const BamHeader& other) const;
+
+ /// \}
+
+public:
+ /// \name General Attributes
+ /// \{
+
+ /// \returns the %PacBio %BAM version number (\@HD:pb)
+ ///
+ /// \note This is different from the SAM/BAM version number
+ /// \sa BamHeader::Version.
+ ///
+ std::string PacBioBamVersion(void) const;
+
+ /// \returns the sort order used
+ ///
+ /// Valid values: "unknown", "unsorted", "queryname", or "coordinate"
+ ///
+ std::string SortOrder(void) const;
+
+ /// \returns the SAM/BAM version number (\@HD:VN)
+ ///
+ /// \note This is different from the %PacBio %BAM version number
+ /// \sa BamHeader::PacBioBamVersion
+ ///
+ std::string Version(void) const;
+
+ /// \}
+
+public:
+ /// \name Read Groups
+ /// \{
+
+ /// \returns true if the header contains a read group with \p id (\@RG:ID)
+ bool HasReadGroup(const std::string& id) const;
+
+ /// \returns a ReadGroupInfo object representing the read group matching
+ /// \p id (\@RG:ID)
+ /// \throws std::runtime_error if \p id is unknown
+ ///
+ ReadGroupInfo ReadGroup(const std::string& id) const;
+
+ /// \returns vector of read group IDs listed in this header
+ std::vector<std::string> ReadGroupIds(void) const;
+
+ /// \returns vector of ReadGroupInfo objects, representing all read groups
+ /// listed in this header
+ ///
+ std::vector<ReadGroupInfo> ReadGroups(void) const;
+
+ /// \}
+
+public:
+ /// \name Sequences
+ /// \{
+
+ /// \returns true if header contains a sequence with \p name (\@SQ:SN)
+ bool HasSequence(const std::string& name) const;
+
+ /// \returns number of sequences (\@SQ entries) stored in this header
+ size_t NumSequences(void) const;
+
+ /// \returns numeric ID for sequence matching \p name (\@SQ:SN)
+ ///
+ /// This is the numeric ID used elsewhere throughout the API.
+ ///
+ /// \throws std::runtime_error if \p name is unknown
+ /// \sa BamReader::ReferenceId, PbiReferenceIdFilter,
+ /// PbiRawMappedData::tId_
+ ///
+ int32_t SequenceId(const std::string& name) const;
+
+ /// \returns the length of the sequence (\@SQ:LN, e.g. chromosome length) at
+ /// index \p id
+ ///
+ /// \sa SequenceInfo::Length, BamHeader::SequenceId
+ ///
+ std::string SequenceLength(const int32_t id) const;
+
+ /// \returns the name of the sequence (\@SQ:SN) at index \p id
+ ///
+ /// \sa SequenceInfo::Name, BamHeader::SequenceId
+ ///
+ std::string SequenceName(const int32_t id) const;
+
+ /// \returns vector of sequence names (\@SQ:SN) stored in this header
+ ///
+ /// Position in the vector is equivalent to SequenceId.
+ ///
+ std::vector<std::string> SequenceNames(void) const;
+
+ /// \returns SequenceInfo object at index \p id
+ ///
+ /// \throws std::out_of_range if \p is an invalid or unknown index
+ /// \sa BamHeader::SequenceId
+ ///
+ SequenceInfo Sequence(const int32_t id) const;
+
+ /// \returns SequenceInfo for the sequence matching \p name
+ SequenceInfo Sequence(const std::string& name) const;
+
+ /// \returns vector of SequenceInfo objects representing the sequences
+ /// (\@SQ entries) stored in this header
+ ///
+ std::vector<SequenceInfo> Sequences(void) const;
+
+ /// \}
+
+public:
+ /// \name Programs
+ /// \{
+
+ /// \returns true if this header contains a program entry with ID (\@PG:ID)
+ /// matching \p id
+ ///
+ bool HasProgram(const std::string& id) const;
+
+ /// \returns ProgramInfo object for the program entry matching \p id
+ /// \throws std::runtime_error if \p id is unknown
+ ///
+ ProgramInfo Program(const std::string& id) const;
+
+ /// \returns vector of program IDs (\@PG:ID)
+ std::vector<std::string> ProgramIds(void) const;
+
+ /// \returns vector of ProgramInfo objects representing program entries
+ /// (\@PG) stored in this heder
+ ///
+ std::vector<ProgramInfo> Programs(void) const;
+
+ /// \}
+
+public:
+ /// \name Comments
+ /// \{
+
+ /// \returns vector of comment (\@CO) strings
+ std::vector<std::string> Comments(void) const;
+
+ /// \}
+
+public:
+ /// \name Conversion Methods
+ /// \{
+
+ /// \returns SAM-header-formatted string representing this header's data
+ std::string ToSam(void) const;
+
+ /// \}
+
+public:
+
+ /// \name General Attributes
+ /// \{
+
+ /// \brief Sets this header's PacBioBAM version number (\@HD:pb).
+ ///
+ /// \returns reference to this object
+ /// \throws std::runtime_error if version number cannot be parsed or
+ /// is less than the minimum version allowed.
+ ///
+ BamHeader& PacBioBamVersion(const std::string& version);
+
+ /// \brief Sets this header's sort order label (\@HD:SO).
+ ///
+ /// Valid values: "unknown", "unsorted", "queryname", or "coordinate"
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& SortOrder(const std::string& order);
+
+ /// \brief Sets this header's SAM/BAM version number (\@HD:VN).
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& Version(const std::string& version);
+
+ /// \}
+
+public:
+ /// \name Read Groups
+ /// \{
+
+ /// \brief Appends a read group entry (\@RG) to this header.
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& AddReadGroup(const ReadGroupInfo& readGroup);
+
+ /// \brief Removes all read group entries from this header.
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& ClearReadGroups(void);
+
+ /// \brief Replaces this header's list of read group entries with those in
+ /// \p readGroups.
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& ReadGroups(const std::vector<ReadGroupInfo>& readGroups);
+
+ /// \}
+
+public:
+ /// \name Sequences
+ /// \{
+
+ /// \brief Appends a sequence entry (\@SQ) to this header.
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& AddSequence(const SequenceInfo& sequence);
+
+ /// \brief Removes all sequence entries from this header.
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& ClearSequences(void);
+
+ /// \brief Replaces this header's list of sequence entries with those in
+ /// \p sequences.
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& Sequences(const std::vector<SequenceInfo>& sequences);
+
+ /// \}
+
+public:
+ /// \name Programs
+ /// \{
+
+ /// \brief Appends a program entry (\@PG) to this header.
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& AddProgram(const ProgramInfo& pg);
+
+ /// \brief Removes all program entries from this header.
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& ClearPrograms(void);
+
+ /// \brief Replaces this header's list of program entries with those in
+ /// \p programs.
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& Programs(const std::vector<ProgramInfo>& programs);
+
+ /// \}
+
+public:
+ /// \name Comments
+ /// \{
+
+ /// \brief Appends a comment (\@CO) to this header.
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& AddComment(const std::string& comment);
+
+ /// \brief Removes all comments from this header.
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& ClearComments(void);
+
+ /// \brief Replaces this header's list of comments with those in \p comments.
+ ///
+ /// \returns reference to this object
+ ///
+ BamHeader& Comments(const std::vector<std::string>& comments);
+
+ /// \}
+
+private:
+ PBBAM_SHARED_PTR<internal::BamHeaderPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/BamHeader.inl"
+
+#endif // BAMHEADER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamReader.h
+/// \brief Defines the BamReader class.
+//
+// Author: Derek Barnett
+
+#ifndef BAMREADER_H
+#define BAMREADER_H
+
+#include "pbbam/BamFile.h"
+#include "pbbam/BamHeader.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/Config.h"
+#include "pbbam/GenomicInterval.h"
+
+#include <htslib/sam.h>
+#include <memory>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { struct BamReaderPrivate; }
+
+/// \brief The BamReader class provides basic read-access to a %BAM file.
+///
+/// The base-class implementation provides a sequential read-through of BAM
+/// records. Derived classes may implement other access schemes (e.g. genomic
+/// region, PBI-enabled record filtering).
+///
+class PBBAM_EXPORT BamReader
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Opens BAM file for reading.
+ ///
+ /// \param[in] fn %BAM filename
+ /// \throws std::runtime_error if failed to open
+ ///
+ explicit BamReader(const std::string& fn);
+
+ /// \brief Opens BAM file for reading.
+ ///
+ /// \param[in] bamFile BamFile object
+ /// \throws std::runtime_error if failed to open
+ ///
+ explicit BamReader(const BamFile& bamFile);
+
+ /// \brief Opens BAM file for reading.
+ ///
+ /// \param[in] bamFile BamFile object
+ /// \throws std::runtime_error if failed to open
+ ///
+ explicit BamReader(BamFile&& bamFile);
+
+ virtual ~BamReader(void);
+
+ /// \}
+
+public:
+ /// \name BAM File Attributes
+ /// \{
+
+ /// \returns the underlying BamFile
+ const BamFile& File(void) const;
+
+ /// \returns %BAM filename
+ std::string Filename(void) const;
+
+ /// \returns BamHeader object from %BAM header contents
+ const BamHeader& Header(void) const;
+
+ /// \}
+
+public:
+ /// \name BAM File I/O
+ /// \{
+
+ /// \brief Fetches the "next" %BAM record.
+ ///
+ /// Default implementation will read records until EOF. Derived readers may
+ /// use additional criteria to decide which record is "next" and when
+ /// reading is done.
+ ///
+ /// \param[out] record next BamRecord object. Should not be used if method
+ /// returns false.
+ ///
+ /// \returns true if record was read successfully. Returns false if EOF (or
+ /// end of iterator in derived readers). False is not an error,
+ /// it indicates "end of data".
+ ///
+ /// \throws std::runtime_error if failed to read from file (e.g. possible
+ /// truncated or corrupted file).
+ ///
+ bool GetNext(BamRecord& record);
+
+ /// \brief Seeks to virtual offset in %BAM.
+ ///
+ /// \note This is \b NOT a normal file offset, but the virtual offset used
+ /// in %BAM indexing.
+ ///
+ /// \throws std::runtime_error if failed to seek
+ ///
+ void VirtualSeek(int64_t virtualOffset);
+
+ /// \returns current (virtual) file position.
+ ///
+ /// \note This is \b NOT a normal file offset, but the virtual offset used
+ /// in %BAM indexing.
+ ///
+ int64_t VirtualTell(void) const;
+
+ /// \}
+
+protected:
+ /// \name BAM File I/O
+ /// \{
+
+ /// \brief Helper method for access to underlying BGZF stream pointer.
+ ///
+ /// Useful for derived readers' contact points with htslib methods.
+ ///
+ /// \returns BGZF stream pointer
+ ///
+ BGZF* Bgzf(void) const;
+
+ /// \brief Performs the actual raw read of the next record from the BAM
+ /// file.
+ ///
+ /// Default implementation will read records, sequentially, until EOF.
+ /// Derived readers may use additional criteria to decide which record is
+ /// "next" and when reading is done.
+ ///
+ /// Return value should be equivalent to htslib's bam_read1():
+ /// >= 0 : normal
+ /// -1 : EOF (not an error)
+ /// < -1 : error
+ ///
+ /// \param[in] bgzf BGZF stream pointer
+ /// \param[out] b %BAM record pointer
+ /// \returns integer status code, see description
+ ///
+ virtual int ReadRawData(BGZF* bgzf, bam1_t* b);
+
+ /// \}
+
+private:
+ std::unique_ptr<internal::BamReaderPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // BAMREADER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecord.h
+/// \brief Defines the BamRecord class.
+//
+// Author: Derek Barnett
+
+#ifndef BAMRECORD_H
+#define BAMRECORD_H
+
+#include "pbbam/Accuracy.h"
+#include "pbbam/Frames.h"
+#include "pbbam/BamRecordImpl.h"
+#include "pbbam/BamHeader.h"
+#include "pbbam/ClipType.h"
+#include "pbbam/FrameEncodingType.h"
+#include "pbbam/LocalContextFlags.h"
+#include "pbbam/Orientation.h"
+#include "pbbam/PulseBehavior.h"
+#include "pbbam/ReadGroupInfo.h"
+#include "pbbam/RecordType.h"
+#include "pbbam/Strand.h"
+#include "pbbam/QualityValues.h"
+#include "pbbam/virtual/VirtualRegionType.h"
+#include "pbbam/ZmwType.h"
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal {
+
+class BamRecordMemory;
+class Pulse2BaseCache;
+
+} // namespace internal
+
+/// \brief The BamRecord class represents a %PacBio %BAM record.
+///
+/// %PacBio %BAM records are extensions of normal SAM/BAM records. Thus in
+/// addition to normal fields like bases, qualities, mapping coordinates, etc.,
+/// tags are used extensively to annotate records with additional
+/// PacBio-specific data.
+///
+/// Mapping and clipping APIs are provided as well to ensure that such
+/// operations "trickle down" to all data fields properly.
+///
+/// \sa https://samtools.github.io/hts-specs/SAMv1.pdf
+/// for more information on standard %BAM data, and
+/// https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/BAM.rst
+/// for more information on %PacBio %BAM fields.
+///
+class PBBAM_EXPORT BamRecord
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ BamRecord(void);
+ BamRecord(const BamHeader& header);
+ BamRecord(const BamRecordImpl& impl);
+ BamRecord(BamRecordImpl&& impl);
+ BamRecord(const BamRecord& other);
+ BamRecord(BamRecord&& other);
+ BamRecord& operator=(const BamRecord& other);
+ BamRecord& operator=(BamRecord&& other);
+ virtual ~BamRecord(void);
+
+ /// \}
+
+public:
+ /// \name General Data
+ /// \{
+
+ /// \returns this record's full name
+ /// \sa BamRecordImpl::Name
+ ///
+ std::string FullName(void) const;
+
+ /// \returns shared pointer to this record's associated BamHeader
+ BamHeader Header(void) const;
+
+ /// \returns ZMW hole number
+ /// \throws if missing zm tag & record name does not contain hole number
+ ///
+ int32_t HoleNumber(void) const;
+
+ /// \returns this record's LocalContextFlags
+ PacBio::BAM::LocalContextFlags LocalContextFlags(void) const;
+
+ /// \returns this record's movie name
+ std::string MovieName(void) const;
+
+ /// \returns "number of complete passes of the insert"
+ int32_t NumPasses(void) const;
+
+ /// \returns the record's query end position, or Sequence().length() if not
+ /// stored
+ /// \note QueryEnd is in polymerase read coordinates, NOT genomic
+ /// coordinates.
+ ///
+ Position QueryEnd(void) const;
+
+ /// \returns the record's query start position, or 0 if not stored
+ ///
+ /// \note QueryStart is in polymerase read coordinates, NOT genomic
+ /// coordinates.
+ ///
+ Position QueryStart(void) const;
+
+ /// \returns this record's expected read accuracy [0, 1000]
+ Accuracy ReadAccuracy(void) const;
+
+ /// \returns ReadGroupInfo object for this record
+ ReadGroupInfo ReadGroup(void) const;
+
+ /// \returns string ID of this record's read group
+ /// \sa ReadGroupInfo::Id
+ ///
+ std::string ReadGroupId(void) const;
+
+ /// \returns integer value for this record's read group ID
+ int32_t ReadGroupNumericId(void) const;
+
+ /// \returns this scrap record's scrap region type
+ VirtualRegionType ScrapRegionType(void) const;
+
+ /// \returns this scrap record's scrap ZMW type
+ ZmwType ScrapZmwType(void) const;
+
+ /// \returns this record's average signal-to-noise for each of A, C, G,
+ /// and T
+ ///
+ std::vector<float> SignalToNoise(void) const;
+
+ /// \returns this record's type
+ /// \sa RecordType
+ RecordType Type(void) const;
+
+ /// \}
+
+public:
+ /// \name Mapping Data
+ /// \{
+
+ /// \returns the record's aligned end position
+ ///
+ /// \note AlignedEnd is in polymerase read coordinates, NOT genomic
+ /// coordinates.
+ ///
+ Position AlignedEnd(void) const;
+
+ /// \returns the record's aligned start position
+ ///
+ /// \note AlignedStart is in polymerase read coordinates, NOT genomic
+ /// coordinates.
+ ///
+ Position AlignedStart(void) const;
+
+ /// \returns the record's strand as a Strand enum value
+ Strand AlignedStrand(void) const;
+
+ /// \returns the record's CIGAR data as a Cigar object
+ ///
+ /// \param[in] exciseAllClips if true, remove all clipping operations
+ /// (hard & soft) [default:false]
+ ///
+ Cigar CigarData(bool exciseAllClips = false) const;
+
+ /// \returns true if this record was mapped by aligner
+ bool IsMapped(void) const;
+
+ /// \returns this record's mapping quality. A value of 255 indicates
+ /// "unknown"
+ ///
+ uint8_t MapQuality(void) const;
+
+ /// \returns the number of deleted bases (relative to reference)
+ size_t NumDeletedBases(void) const;
+
+ /// \returns the number of inserted bases (relative to reference)
+ size_t NumInsertedBases(void) const;
+
+ /// \returns the number of matching bases (sum of '=' CIGAR op lengths)
+ size_t NumMatches(void) const;
+
+ /// \returns a tuple containing NumMatches (first) and NumMismatches
+ /// (second)
+ ///
+ std::pair<size_t, size_t> NumMatchesAndMismatches(void) const;
+
+ /// \returns the number of mismatching bases (sum of 'X' CIGAR op lengths)
+ size_t NumMismatches(void) const;
+
+ /// \returns this record's reference ID, or -1 if unmapped.
+ ///
+ /// \note This is only a valid identifier within this %BAM file
+ ///
+ int32_t ReferenceId(void) const;
+
+ /// \returns this record's reference name.
+ ///
+ /// \throws an exception if unmapped record.
+ ///
+ std::string ReferenceName(void) const;
+
+ /// \returns the record's reference end position, or UnmappedPosition if
+ /// unmapped
+ ///
+ /// \note ReferenceEnd is in reference coordinates, NOT polymerase read
+ /// coordinates.
+ ///
+ Position ReferenceEnd(void) const;
+
+ /// \returns the record's reference start position, or UnmappedPosition if
+ /// unmapped
+ ///
+ /// \note ReferenceStart is in reference coordinates, NOT polymerase read
+ /// coordinates.
+ ///
+ Position ReferenceStart(void) const;
+
+ /// \}
+
+public:
+ /// \name Barcode Data
+ /// \{
+
+ /// \returns forward barcode id
+ ///
+ /// \throws std::runtime_error if barcode data is absent or malformed.
+ /// \sa HasBarcodes
+ ///
+ int16_t BarcodeForward(void) const;
+
+ /// \returns barcode call confidence (Phred-scaled posterior probability
+ /// of correct barcode call)
+ ///
+ /// \sa HasBarcodeQuality
+ ///
+ uint8_t BarcodeQuality(void) const;
+
+ /// \returns reverse barcode id
+ ///
+ /// \throws std::runtime_error if barcode data is absent or malformed.
+ /// \sa HasBarcodes
+ ///
+ int16_t BarcodeReverse(void) const;
+
+ /// \returns the forward and reverse barcode ids
+ ///
+ /// \throws std::runtime_error if barcode data is absent or malformed.
+ /// \sa HasBarcodes
+ ///
+ std::pair<int16_t,int16_t> Barcodes(void) const;
+
+ /// \}
+
+public:
+ /// \name Auxiliary Data Queries
+ /// \{
+
+ /// \returns true if this record has AltLabelQV data
+ bool HasAltLabelQV(void) const;
+
+ /// \returns true if this record has AltLabelTag data
+ bool HasAltLabelTag(void) const;
+
+ /// \returns true if this record has Barcode data
+ bool HasBarcodes(void) const;
+
+ /// \returns true is this record has BarcodeQuality data
+ bool HasBarcodeQuality(void) const;
+
+ /// \returns true if this record has DeletionQV data
+ bool HasDeletionQV(void) const;
+
+ /// \returns true if this record has DeletionTag data
+ bool HasDeletionTag(void) const;
+
+ /// \returns true if this record has a HoleNumber
+ bool HasHoleNumber(void) const;
+
+ /// \returns true if this record has InsertionQV data
+ bool HasInsertionQV(void) const;
+
+ /// \returns true if this record has IPD data
+ bool HasIPD(void) const;
+
+ /// \returns true if this record has LabelQV data
+ bool HasLabelQV(void) const;
+
+ /// \returns true if this record has LocalContextFlags (absent in CCS)
+ bool HasLocalContextFlags(void) const;
+
+ /// \returns true if this record has MergeQV data
+ bool HasMergeQV(void) const;
+
+ /// \returns true if this record has NumPasses data
+ bool HasNumPasses(void) const;
+
+ /// \returns true if this record has Pkmean data
+ bool HasPkmean(void) const;
+
+ /// \returns true if this record has Pkmid data
+ bool HasPkmid(void) const;
+
+ /// \returns true if this record has Pkmean2 data
+ bool HasPkmean2(void) const;
+
+ /// \returns true if this record has Pkmid2 data
+ bool HasPkmid2(void) const;
+
+ /// \returns true if this record has PreBaseFrames aka IPD data
+ bool HasPreBaseFrames(void) const;
+
+ /// \returns true if this record has PrePulseFrames data
+ bool HasPrePulseFrames(void) const;
+
+ /// \returns true if this record has PulseCall data
+ bool HasPulseCall(void) const;
+
+ /// \returns true if this record has PulseCallWidth data
+ bool HasPulseCallWidth(void) const;
+
+ /// \returns true if this record has PulseMergeQV data
+ bool HasPulseMergeQV(void) const;
+
+ /// \returns true if this record has PulseWidth data
+ bool HasPulseWidth(void) const;
+
+ /// \returns true if this record has ReadAccuracyTag data
+ bool HasReadAccuracy(void) const;
+
+ /// \returns true if this record has QueryEnd data
+ bool HasQueryEnd(void) const;
+
+ /// \returns true if this record has QueryStart data
+ bool HasQueryStart(void) const;
+
+ /// \returns true if this record has ScrapRegionType data (only in SCRAP)
+ bool HasScrapRegionType(void) const;
+
+ /// \returns true if this record has scrap ZMW type data (only in SCRAP)
+ bool HasScrapZmwType(void) const;
+
+ /// \returns true if this record has signal-to-noise data (absent in
+ /// POLYMERASE)
+ ///
+ bool HasSignalToNoise(void) const;
+
+ /// \returns true if this record has StartFrame data
+ bool HasStartFrame(void) const;
+
+ /// \returns true if this record has SubstitutionQV data
+ bool HasSubstitutionQV(void) const;
+
+ /// \returns true if this record has SubstitutionTag data
+ bool HasSubstitutionTag(void) const;
+
+ /// \}
+
+public:
+ /// \name Sequence & Tag Data
+ /// \{
+
+ /// \brief Fetches this record's AltLabelTag values ("pt" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new gap chars will be '-' and padding chars will be '*'.
+ ///
+ /// \param[in] orientation Orientation of output.
+ ///
+ /// \returns AltLabelTags string
+ ///
+ std::string AltLabelTag(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false,
+ PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ /// \brief Fetches this record's DeletionTag values ("dt" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new gap chars will be '-' and padding chars will be '*'.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns DeletionTag string
+ ///
+ std::string DeletionTag(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
+
+ /// \brief Fetches this record's DNA sequence (SEQ field).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new gap chars will be '-' and padding chars will be '*'.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns sequence string
+ ///
+ std::string Sequence(const Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
+
+ /// \brief Fetches this record's SubstitutionTag values ("st" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new gap chars will be '-' and padding chars will be '*'.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns SubstitutionTags string
+ ///
+ std::string SubstitutionTag(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
+
+ /// \}
+
+public:
+ /// \name Quality Data
+ /// \{
+
+ /// \brief Fetches this record's AltLabelQV values ("pv" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
+ ///
+ /// \param[in] orientation Orientation of output.
+ ///
+ /// \returns AltLabelQV as QualityValues object
+ ///
+ QualityValues AltLabelQV(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false,
+ PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ /// \brief Fetches this record's DeletionQV values ("dq" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns DeletionQV as QualityValues object
+ ///
+ QualityValues DeletionQV(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
+
+ /// \brief Fetches this record's InsertionQV values ("iq" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns InsertionQVs as QualityValues object
+ ///
+ QualityValues InsertionQV(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
+
+ /// \brief Fetches this record's LabelQV values ("pq" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
+ ///
+ /// \param[in] orientation Orientation of output.
+ ///
+ /// \returns LabelQV as QualityValues object
+ ///
+ QualityValues LabelQV(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false,
+ PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ /// \brief Fetches this record's MergeQV values ("mq" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns MergeQV as QualityValues object
+ ///
+ QualityValues MergeQV(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
+
+ /// \brief Fetches this record's %BAM quality values (QUAL field).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns %BAM qualities as QualityValues object
+ ///
+ QualityValues Qualities(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
+
+ /// \brief Fetches this record's SubstitutionQV values ("sq" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new QVs will have a value of 0.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns SubstitutionQV as QualityValues object
+ ///
+ QualityValues SubstitutionQV(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
+
+ /// \}
+
+public:
+ /// \name Pulse Data
+ /// \{
+
+ /// \brief Fetches this record's IPD values ("ip" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new frames will have a value of 0;
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns IPD as Frames object
+ ///
+ Frames IPD(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
+
+ /// \brief Fetches this record's IPD values ("ip" tag), but does not upscale.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \returns IPD as Frames object
+ ///
+ Frames IPDRaw(Orientation orientation = Orientation::NATIVE) const;
+
+ /// \brief Fetches this record's Pkmean values ("pa" tag).
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \returns Pkmean as vector<float> object
+ ///
+ std::vector<float> Pkmean(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false,
+ PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ /// \brief Fetches this record's Pkmid values ("pm" tag).
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \returns Pkmid as vector<float> object
+ ///
+ std::vector<float> Pkmid(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false,
+ PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ /// \brief Fetches this record's Pkmean2 values ("pi" tag).
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \returns Pkmean as vector<float> object
+ ///
+ std::vector<float> Pkmean2(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false,
+ PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ /// \brief Fetches this record's Pkmid2 values ("ps" tag).
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \returns Pkmid as vector<float> object
+ ///
+ std::vector<float> Pkmid2(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false,
+ PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ /// \brief Fetches this record's PreBaseFrames aka IPD values ("ip" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new frames will have a value of 0;
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns IPD as Frames object
+ ///
+ Frames PreBaseFrames(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
+
+ /// \brief Fetches this record's PrePulseFrames values ("pd" tag).
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \returns PrePulseFrames as Frames object
+ ///
+ Frames PrePulseFrames(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false,
+ PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ /// \brief Fetches this record's PulseCall values ("pc" tag).
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \returns PulseCalls string
+ ///
+ std::string PulseCall(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false,
+ PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ /// \brief Fetches this record's PulseCallWidth values ("px" tag).
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \returns PulseCallWidth as Frames object
+ ///
+ Frames PulseCallWidth(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false,
+ PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ /// \brief Fetch this record's PulseMergeQV values ("pg" tag).
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \returns PulseMergeQV as QualityValues object
+ ///
+ QualityValues PulseMergeQV(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false,
+ PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ /// \brief Fetches this record's PulseWidth values ("pw" tag).
+ ///
+ /// \note If \p aligned is true, and gaps/padding need to be inserted, the
+ /// new frames will have a value of 0.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns PulseWidths as Frames object
+ ///
+ Frames PulseWidth(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
+
+ /// \brief Fetches this record's PulseWidth values ("pw" tag), but does not
+ /// upscale.
+ ///
+ /// \param[in] orientation Orientation of output.
+ /// \returns PulseWidth as Frames object
+ ///
+ Frames PulseWidthRaw(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false) const;
+
+ /// \brief Fetches this record's StartFrame values ("sf" tag).
+ ///
+ /// \param[in] orientation Orientation of output
+ ///
+ /// \returns StartFrame as uint32_t vector
+ ///
+ std::vector<uint32_t> StartFrame(Orientation orientation = Orientation::NATIVE,
+ bool aligned = false,
+ bool exciseSoftClips = false,
+ PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ /// \}
+
+public:
+ /// \name Low-Level Access & Operations
+ /// \{
+
+ /// \warning This method should be considered temporary and avoided as much
+ /// as possible. Direct access to the internal object is likely to
+ /// disappear as BamRecord interface matures.
+ ///
+ /// \returns const reference to underlying BamRecordImpl object
+ ///
+ const BamRecordImpl& Impl(void) const;
+
+ /// \warning This method should be considered temporary and avoided as much
+ /// as possible. Direct access to the internal object is likely to
+ /// disappear as BamRecord interface matures.
+ ///
+ /// \returns reference to underlying BamRecordImpl object
+ ///
+ BamRecordImpl& Impl(void);
+
+ /// \}
+
+public:
+ /// \name General Data
+ /// \{
+
+ /// \brief Sets this record's ZMW hole number.
+ ///
+ /// \param[in] holeNumber
+ /// \returns reference to this record
+ ///
+ BamRecord& HoleNumber(const int32_t holeNumber);
+
+ /// \brief Sets this record's local context flags
+ ///
+ /// \param[in] flags
+ /// \returns reference to this record
+ ///
+ BamRecord& LocalContextFlags(const PacBio::BAM::LocalContextFlags flags);
+
+ /// \brief Sets this record's "number of complete passes of the insert".
+ ///
+ /// \param[in] numPasses
+ /// \returns reference to this record
+ ///
+ BamRecord& NumPasses(const int32_t numPasses);
+
+ /// \brief Sets this record's query end position.
+ ///
+ /// \note Changing this will modify the name of non-CCS records.
+ ///
+ /// \param[in] pos
+ /// \returns reference to this record
+ ///
+ BamRecord& QueryEnd(const PacBio::BAM::Position pos);
+
+ /// \brief Sets this record's query start position.
+ ///
+ /// \note Changing this will modify the name of non-CCS records.
+ ///
+ /// \param[in] pos
+ /// \returns reference to this record
+ ///
+ BamRecord& QueryStart(const PacBio::BAM::Position pos);
+
+ /// \brief Sets this record's expected read accuracy [0, 1000]
+ ///
+ /// \param[in] accuracy
+ /// \returns reference to this record
+ ///
+ BamRecord& ReadAccuracy(const Accuracy& accuracy);
+
+ /// \brief Attaches this record to the provided read group, changing the
+ /// record name & 'RG' tag.
+ ///
+ /// \param[in] rg
+ /// \returns reference to this record
+ ///
+ BamRecord& ReadGroup(const ReadGroupInfo& rg);
+
+ /// \brief Attaches this record to the provided read group, changing the
+ /// record name & 'RG' tag.
+ ///
+ /// \param[in] id
+ /// \returns reference to this record
+ ///
+ BamRecord& ReadGroupId(const std::string& id);
+
+ /// \brief Sets this scrap record's ScrapRegionType
+ ///
+ /// \param[in] type
+ /// \returns reference to this record
+ ///
+ BamRecord& ScrapRegionType(const VirtualRegionType type);
+
+ /// \brief Sets this scrap record's ScrapRegionType
+ ///
+ /// \param[in] type character equivalent of VirtualRegionType
+ /// \returns reference to this record
+ ///
+ BamRecord& ScrapRegionType(const char type);
+
+ /// \brief Sets this scrap record's ScrapZmwType
+ ///
+ /// \param[in] type
+ /// \returns reference to this record
+ ///
+ BamRecord& ScrapZmwType(const ZmwType type);
+
+ /// \brief Sets this scrap record's ScrapZmwType
+ ///
+ /// \param[in] type character equivalent of ZmwType
+ /// \returns reference to this record
+ ///
+ BamRecord& ScrapZmwType(const char type);
+
+ /// \brief Sets this record's average signal-to-noise in each of A, C, G,
+ /// and T
+ ///
+ /// \param[in] snr average signal-to-noise of A, C, G, and T (in this order)
+ /// \returns reference to this record
+ ///
+ BamRecord& SignalToNoise(const std::vector<float>& snr);
+
+ /// \}
+
+public:
+ /// \name Barcode Data
+ /// \{
+
+ /// \brief Sets this record's barcode IDs ('bc' tag)
+ ///
+ /// \param[in] barcodeIds
+ /// \returns reference to this record
+ ///
+ BamRecord& Barcodes(const std::pair<int16_t, int16_t>& barcodeIds);
+
+ /// \brief Sets this record's barcode quality ('bq' tag)
+ ///
+ /// \param[in] quality Phred-scaled confidence call
+ /// \returns reference to this record
+ ///
+ BamRecord& BarcodeQuality(const uint8_t quality);
+
+ /// \}
+
+public:
+ /// \name Sequence & Tag Data
+ /// \{
+
+ /// \brief Sets this record's AltLabelTag values ("at" tag).
+ ///
+ /// \param[in] tags
+ /// \returns reference to this record
+ ///
+ BamRecord& AltLabelTag(const std::string& tags);
+
+ /// \brief Sets this record's DeletionTag values ("dt" tag).
+ ///
+ /// \param[in] tags
+ /// \returns reference to this record
+ ///
+ BamRecord& DeletionTag(const std::string& tags);
+
+ /// \brief Sets this record's SubstitutionTag values ("st" tag).
+ ///
+ /// \param[in] tags
+ /// \returns reference to this record
+ ///
+ BamRecord& SubstitutionTag(const std::string& tags);
+
+ /// \}
+
+public:
+ /// \name Quality Data
+ /// \{
+
+ /// \brief Sets this record's AltLabelQV values ("pv" tag).
+ ///
+ /// \param[in] altLabelQVs
+ /// \returns reference to this record
+ ///
+ BamRecord& AltLabelQV(const QualityValues& altLabelQVs);
+
+ /// \brief Sets this record's DeletionQV values ("dq" tag).
+ ///
+ /// \param[in] deletionQVs
+ /// \returns reference to this record
+ ///
+ BamRecord& DeletionQV(const QualityValues& deletionQVs);
+
+ /// \brief Sets this record's InsertionQV values ("iq" tag).
+ ///
+ /// \param[in] insertionQVs
+ /// \returns reference to this record
+ ///
+ BamRecord& InsertionQV(const QualityValues& insertionQVs);
+
+ /// \brief Sets this record's LabelQV values ("pq" tag).
+ ///
+ /// \param[in] labelQVs
+ /// \returns reference to this record
+ ///
+ BamRecord& LabelQV(const QualityValues& labelQVs);
+
+ /// \brief Sets this record's MergeQV values ("mq" tag).
+ ///
+ /// \param[in] mergeQVs
+ /// \returns reference to this record
+ ///
+ BamRecord& MergeQV(const QualityValues& mergeQVs);
+
+ /// \brief Sets this record's SubstitutionQV values ("sq" tag).
+ ///
+ /// \param[in] substitutionQVs
+ /// \returns reference to this record
+ ///
+ BamRecord& SubstitutionQV(const QualityValues& substitutionQVs);
+
+ /// \}
+
+public:
+ /// \name Pulse Data
+ /// \{
+
+ /// \brief Sets this record's IPD values ("ip" tag).
+ ///
+ /// \param[in] frames
+ /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+ /// 16-bit lossless)
+ /// \returns reference to this record
+ ///
+ BamRecord& IPD(const Frames& frames,
+ const FrameEncodingType encoding);
+
+ /// \brief Sets this record's Pkmean values ("pm" tag).
+ ///
+ /// \param[in] photons
+ /// \returns reference to this record
+ ///
+ BamRecord& Pkmean(const std::vector<float>& photons);
+
+ /// \brief Sets this record's Pkmean values ("pm" tag).
+ ///
+ /// \param[in] encodedPhotons
+ /// \returns reference to this record
+ ///
+ BamRecord& Pkmean(const std::vector<uint16_t>& encodedPhotons);
+
+ /// \brief Sets this record's Pkmid values ("pa" tag).
+ ///
+ /// \param[in] photons
+ /// \returns reference to this record
+ ///
+ BamRecord& Pkmid(const std::vector<float>& photons);
+
+ /// \brief Sets this record's Pkmid values ("pa" tag).
+ ///
+ /// \param[in] encodedPhotons
+ /// \returns reference to this record
+ ///
+ BamRecord& Pkmid(const std::vector<uint16_t>& encodedPhotons);
+
+ /// \brief Sets this record's Pkmean2 values ("ps" tag).
+ ///
+ /// \param[in] photons
+ /// \returns reference to this record
+ ///
+ BamRecord& Pkmean2(const std::vector<float>& photons);
+
+ /// \brief Sets this record's Pkmean2 values ("ps" tag).
+ ///
+ /// \param[in] encodedPhotons
+ /// \returns reference to this record
+ ///
+ BamRecord& Pkmean2(const std::vector<uint16_t>& encodedPhotons);
+
+ /// \brief Sets this record's Pkmid2 values ("pi" tag).
+ ///
+ /// \param[in] photons
+ /// \returns reference to this record
+ ///
+ BamRecord& Pkmid2(const std::vector<float>& photons);
+
+ /// \brief Sets this record's Pkmid2 values ("pi" tag).
+ ///
+ /// \param[in] encodedPhotons
+ /// \returns reference to this record
+ ///
+ BamRecord& Pkmid2(const std::vector<uint16_t>& encodedPhotons);
+
+ /// \brief Sets this record's PreBaseFrames aka IPD values ("ip" tag).
+ ///
+ /// \param[in] frames
+ /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+ /// 16-bit lossless)
+ /// \returns reference to this record
+ ///
+ BamRecord& PreBaseFrames(const Frames& frames,
+ const FrameEncodingType encoding);
+
+ /// \brief Sets this record's PrePulseFrames values ("pd" tag).
+ ///
+ /// \param[in] frames
+ /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+ /// 16-bit lossless)
+ /// \returns reference to this record
+ ///
+ BamRecord& PrePulseFrames(const Frames& frames,
+ const FrameEncodingType encoding);
+
+ /// \brief Sets this record's PulseCall values ("pc" tag).
+ ///
+ /// \param[in] tags
+ /// \returns reference to this record
+ ///
+ BamRecord& PulseCall(const std::string& tags);
+
+ /// \brief Sets this record's PulseCallWidth values ("px" tag).
+ ///
+ /// \param[in] frames
+ /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+ /// 16-bit lossless)
+ /// \returns reference to this record
+ ///
+ BamRecord& PulseCallWidth(const Frames& frames,
+ const FrameEncodingType encoding);
+
+ /// \brief Sets this record's PulseMergeQV values ("pg" tag).
+ ///
+ /// \param[in] pulseMergeQVs
+ /// \returns reference to this record
+ ///
+ BamRecord& PulseMergeQV(const QualityValues& pulseMergeQVs);
+
+ /// \brief Sets this record's PulseWidth values ("pw" tag).
+ ///
+ /// \param[in] frames
+ /// \param[in] encoding specify how to encode the data (8-bit lossy, or
+ /// 16-bit lossless)
+ /// \returns reference to this record
+ ///
+ BamRecord& PulseWidth(const Frames& frames,
+ const FrameEncodingType encoding);
+
+ /// \brief Sets this record's StartFrame values ("sf" tag).
+ ///
+ /// \param[in] startFrame
+ /// \returns reference to this record
+ ///
+ BamRecord& StartFrame(const std::vector<uint32_t>& startFrame);
+
+ /// \}
+
+public:
+ /// \name Low-Level Access & Operations
+ /// \{
+
+ /// \brief Resets cached aligned start/end.
+ ///
+ /// \note This method should not be needed in most client code. It exists
+ /// primarily as a hook for internal reading loops (queries, index
+ /// build, etc.) It's essentially a workaround and will likely be
+ /// removed from the API.
+ ///
+ void ResetCachedPositions(void) const;
+
+ /// \brief Resets cached aligned start/end.
+ ///
+ /// \note This method should not be needed in most client code. It exists
+ /// primarily as a hook for internal reading loops (queries, index
+ /// build, etc.) It's essentially a workaround and will likely be
+ /// removed from the API.
+ ///
+ void ResetCachedPositions(void);
+
+ /// \brief Updates the record's name (BamRecord::FullName) to reflect
+ /// modifications to name components (movie name, ZMW hole number,
+ /// etc.)
+ ///
+ void UpdateName(void);
+
+ /// \}
+
+public:
+ /// \name Pulse Data
+ /// \{
+
+ static const float photonFactor;
+
+ static std::vector<uint16_t> EncodePhotons(const std::vector<float>& data);
+
+ /// \}
+
+public:
+ /// \name Clipping & Mapping
+ /// \{
+
+ /// Creates a copied record from input, with clipping applied
+ static BamRecord Clipped(const BamRecord& input,
+ const ClipType clipType,
+ const PacBio::BAM::Position start,
+ const PacBio::BAM::Position end);
+
+ /// Creates a copied record from input, with mapping applied
+ static BamRecord Mapped(const BamRecord& input,
+ const int32_t referenceId,
+ const Position refStart,
+ const Strand strand,
+ const Cigar& cigar,
+ const uint8_t mappingQuality);
+
+ /// Applies clipping to this record
+ BamRecord& Clip(const ClipType clipType,
+ const PacBio::BAM::Position start,
+ const PacBio::BAM::Position end);
+
+ /// Creates a copied record from this one, with clipping applied
+ BamRecord Clipped(const ClipType clipType,
+ const PacBio::BAM::Position start,
+ const PacBio::BAM::Position end) const;
+
+ /// Applies mapping to this record
+ BamRecord& Map(const int32_t referenceId,
+ const Position refStart,
+ const Strand strand,
+ const Cigar& cigar,
+ const uint8_t mappingQuality);
+
+ /// Creates a copied record from this one, with mapping applied
+ BamRecord Mapped(const int32_t referenceId,
+ const Position refStart,
+ const Strand strand,
+ const Cigar& cigar,
+ const uint8_t mappingQuality) const;
+ /// \}
+
+private:
+ BamRecordImpl impl_;
+
+public:
+ /// public & mutable so that queries can directly set the header info,
+ /// even on a record that is const from client code's perspective
+ mutable BamHeader header_;
+
+private:
+ /// \internal
+ /// cached positions (mutable to allow lazy-calc in const methods)
+ mutable Position alignedStart_;
+ mutable Position alignedEnd_;
+
+private:
+ /// \internal
+ /// pulse to bam mapping cache
+ mutable std::unique_ptr<internal::Pulse2BaseCache> p2bCache_;
+
+private:
+ ///\internal
+ /// clipping methods
+
+ void ClipFields(const size_t clipPos, const size_t clipLength);
+ BamRecord& ClipToQuery(const PacBio::BAM::Position start,
+ const PacBio::BAM::Position end);
+ BamRecord& ClipToReference(const PacBio::BAM::Position start,
+ const PacBio::BAM::Position end);
+ BamRecord& ClipToReferenceForward(const PacBio::BAM::Position start,
+ const PacBio::BAM::Position end);
+ BamRecord& ClipToReferenceReverse(const PacBio::BAM::Position start,
+ const PacBio::BAM::Position end);
+
+private:
+ ///\internal
+ /// raw tag data fetching
+
+ // sequence tags
+ std::string FetchBasesRaw(const BamRecordTag tag) const;
+ std::string FetchBases(const BamRecordTag tag,
+ const Orientation orientation = Orientation::NATIVE,
+ const bool aligned = false,
+ const bool exciseSoftClips = false,
+ const PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ // frame tags
+ Frames FetchFramesRaw(const BamRecordTag tag) const;
+ Frames FetchFrames(const BamRecordTag tag,
+ const Orientation orientation = Orientation::NATIVE,
+ const bool aligned = false,
+ const bool exciseSoftClips = false,
+ const PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ // pulse tags
+ std::vector<float> FetchPhotonsRaw(const BamRecordTag tag) const;
+ std::vector<float> FetchPhotons(const BamRecordTag tag,
+ const Orientation orientation = Orientation::NATIVE,
+ const bool aligned = false,
+ const bool exciseSoftClips = false,
+ const PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ // QV tags
+ QualityValues FetchQualitiesRaw(const BamRecordTag tag) const;
+ QualityValues FetchQualities(const BamRecordTag tag,
+ const Orientation orientation = Orientation::NATIVE,
+ const bool aligned = false,
+ const bool exciseSoftClips = false,
+ const PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+ // UInt tags (e.g. start frame)
+ std::vector<uint32_t> FetchUIntsRaw(const BamRecordTag tag) const;
+ std::vector<uint32_t> FetchUInts(const BamRecordTag tag,
+ const Orientation orientation = Orientation::NATIVE,
+ const bool aligned = false,
+ const bool exciseSoftClips = false,
+ const PulseBehavior pulseBehavior = PulseBehavior::ALL) const;
+
+private:
+ ///\internal
+ /// marked const to allow calling from const methods
+ /// but updates our mutable cached values
+ void CalculateAlignedPositions(void) const;
+ void CalculatePulse2BaseCache(void) const;
+
+ friend class internal::BamRecordMemory;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/BamRecord.inl"
+
+#endif // BAMRECORD_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecordBuilder.h
+/// \brief Defines the BamRecordBuilder class.
+//
+// Author: Derek Barnett
+
+#ifndef BAMRECORDBUILDER_H
+#define BAMRECORDBUILDER_H
+
+#include "pbbam/BamRecord.h"
+#include "pbbam/BamHeader.h"
+#include "pbbam/Config.h"
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The BamRecordBuilder class provides a helper utility for building
+/// BamRecords.
+///
+/// This class provides a mechanism for building up %BAM data and
+/// lazy-encoding/constructing the actual BamRecord. Currently, the methods here
+/// really only support filling in the low-level SAM/BAM-style fields, not so
+/// much the PacBio-specific fields.
+///
+class PBBAM_EXPORT BamRecordBuilder
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty %BAM record builder.
+ BamRecordBuilder(void);
+
+ /// \brief Creates an empty %BAM record builder, with header info to apply
+ /// to built records.
+ ///
+ /// \param[in] header BamHeader object
+ ///
+ explicit BamRecordBuilder(const BamHeader& header);
+
+ /// \brief Creates record builder with inital record data.
+ ///
+ /// \param[in] prototype data from this record will be used to seed the
+ /// builder
+ ///
+ BamRecordBuilder(const BamRecord& prototype);
+
+ BamRecordBuilder(const BamRecordBuilder& other);
+ BamRecordBuilder(BamRecordBuilder&& other);
+ BamRecordBuilder& operator=(const BamRecordBuilder& other);
+ BamRecordBuilder& operator=(BamRecordBuilder&& other);
+ ~BamRecordBuilder(void);
+
+ /// \}
+
+public:
+ /// \name Record-Building
+ /// \{
+
+ /// \brief Builds a BamRecord from current builder attributes.
+ ///
+ /// \returns newly-built BamRecord object
+ ///
+ BamRecord Build(void) const;
+
+ /// \brief Replaces an existing BamRecord's data with current builder
+ /// attributes.
+ ///
+ /// \param[out] record resulting record
+ /// \returns true if successful
+ ///
+ bool BuildInPlace(BamRecord& record) const;
+
+ /// \brief Resets builder attributes to default values.
+ ///
+ void Reset(void);
+
+ /// \brief Resets builder attributes with \p prototype's data.
+ ///
+ /// \param[in] prototype
+ ///
+ void Reset(const BamRecord& prototype);
+
+ /// \brief Resets builder attributes with \p prototype's data.
+ ///
+ /// \param[in] prototype
+ ///
+ void Reset(BamRecord&& prototype);
+
+ /// \}
+
+public:
+
+ /// \name Core Attribute Setup
+ /// \{
+
+ /// \brief Sets the record's (BAI) index bin ID.
+ ///
+ /// \param[in] bin BAI index bin ID.
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Bin(const uint32_t bin);
+
+ /// \brief Sets this record's alignment flag, using a raw integer.
+ ///
+ /// \param[in] flag raw alignment flag
+ /// \returns reference to this record
+ ///
+ BamRecordBuilder& Flag(const uint32_t flag);
+
+ /// \brief Sets this record's insert size.
+ ///
+ /// \param[in] iSize insert size
+ /// \returns reference to this record
+ ///
+ BamRecordBuilder& InsertSize(const int32_t iSize);
+
+ /// \brief Sets this record's map quality.
+ ///
+ /// \param[in] mapQual mapping quality - value of 255 indicates "unknown"
+ /// \returns reference to this record
+ ///
+ BamRecordBuilder& MapQuality(const uint8_t mapQual);
+
+ /// \brief Sets this record's mate's mapped position.
+ ///
+ /// \param[in] pos mapped position. A value of -1 indicates unmapped.
+ /// \returns reference to this record
+ ///
+ BamRecordBuilder& MatePosition(const int32_t pos);
+
+ /// \brief Sets this record's mate's mapped reference ID
+ ///
+ /// \param[in] id reference ID. A value of -1 indicates unmapped.
+ /// \returns reference to this record
+ ///
+ BamRecordBuilder& MateReferenceId(const int32_t id);
+
+ /// \brief Sets this record's mapped position.
+ ///
+ /// \param[in] pos mapped position. A value of -1 indicates unmapped.
+ /// \returns reference to this record
+ ///
+ BamRecordBuilder& Position(const int32_t pos);
+
+ /// \brief Sets this record's mapped reference ID
+ ///
+ /// \param[in] id reference ID. A value of -1 indicates unmapped.
+ /// \returns reference to this record
+ ///
+ BamRecordBuilder& ReferenceId(const int32_t id);
+
+ /// \}
+
+public:
+ /// \name Alignment Flag Setup
+ /// \{
+
+ /// \brief Sets whether this record is a PCR/optical duplicate
+ BamRecordBuilder& SetDuplicate(bool ok);
+
+ /// \brief Sets whether this record failed quality controls
+ BamRecordBuilder& SetFailedQC(bool ok);
+
+ /// \brief Sets whether this record is the first mate of a pair.
+ BamRecordBuilder& SetFirstMate(bool ok);
+
+ /// \brief Sets whether this record was aligned.
+ BamRecordBuilder& SetMapped(bool ok);
+
+ /// \brief Sets whether this record's mate was aligned.
+ BamRecordBuilder& SetMateMapped(bool ok);
+
+ /// \brief Sets whether this record's mate mapped to reverse strand.
+ BamRecordBuilder& SetMateReverseStrand(bool ok);
+
+ /// \brief Sets whether this record came from paired-end sequencing.
+ BamRecordBuilder& SetPaired(bool ok);
+
+ /// \brief Sets whether this record is a read's primary alignment.
+ BamRecordBuilder& SetPrimaryAlignment(bool ok);
+
+ /// \brief Sets whether this record & its mate were properly mapped, per the
+ /// aligner.
+ ///
+ BamRecordBuilder& SetProperPair(bool ok);
+
+ /// \brief Sets whether this record mapped to reverse strand.
+ BamRecordBuilder& SetReverseStrand(bool ok);
+
+ /// \brief Sets whether this record is the second mate of a pair.
+ BamRecordBuilder& SetSecondMate(bool ok);
+
+ /// \brief Sets whether this record is a supplementary alignment.
+ BamRecordBuilder& SetSupplementaryAlignment(bool ok);
+
+ /// \}
+
+public:
+ /// \name Variable-Length Data Setup
+ /// \{
+
+ /// \brief Sets the record's CIGAR data.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Cigar(const PacBio::BAM::Cigar& cigar);
+
+ /// \brief Sets the record's CIGAR data.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Cigar(PacBio::BAM::Cigar&& cigar);
+
+ /// \brief Sets the record's name.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Name(const std::string& name);
+
+ /// \brief Sets the record's name.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Name(std::string&& name);
+
+ /// \brief Sets the record's qualities.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Qualities(const std::string& qualities);
+
+ /// \brief Sets the record's qualities.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Qualities(std::string&& qualities);
+
+ /// \brief Sets the record's sequence.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Sequence(const std::string& sequence);
+
+ /// \brief Sets the record's sequence.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Sequence(std::string&& sequence);
+
+ /// \brief Sets the record's tags.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Tags(const TagCollection& tags);
+
+ /// \brief Sets the record's tags.
+ ///
+ /// \returns reference to this builder
+ ///
+ BamRecordBuilder& Tags(TagCollection&& tags);
+
+ /// \}
+
+private:
+ BamHeader header_;
+ bam1_core_t core_;
+ std::string name_;
+ std::string sequence_;
+ std::string qualities_;
+ PacBio::BAM::Cigar cigar_;
+ TagCollection tags_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/BamRecordBuilder.inl"
+
+#endif // BAMRECORDBUILDER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecordImpl.h
+/// \brief Defines the BamRecordImpl class.
+//
+// Author: Derek Barnett
+
+#ifndef BAMRECORDIMPL_H
+#define BAMRECORDIMPL_H
+
+#include "pbbam/BamRecordTag.h"
+#include "pbbam/Cigar.h"
+#include "pbbam/Config.h"
+#include "pbbam/Position.h"
+#include "pbbam/QualityValues.h"
+#include "pbbam/TagCollection.h"
+#include <htslib/sam.h>
+#include <map>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { class BamRecordMemory; }
+
+/// \brief The BamRecordImpl class holds all data necessary for creating,
+/// querying or editing a generic %BAM record.
+///
+/// For PacBio-specific extensions and convenience methods, see BamRecord.
+///
+/// \note This class is mostly an internal implementation detail and will
+/// likely be removed from the public API in the future. Please use
+/// BamRecord as much as possible.
+///
+class PBBAM_EXPORT BamRecordImpl
+{
+public:
+
+ /// These flags describe the alignment status of the record.
+ enum AlignmentFlag
+ {
+ PAIRED = 0x0001 ///< Record comes from paired-end sequencing
+ , PROPER_PAIR = 0x0002 ///< Each mate of a pair was properly aligned ("proper" as determined by aligner)
+ , UNMAPPED = 0x0004 ///< Record was not mapped by aligner
+ , MATE_UNMAPPED = 0x0008 ///< Record's mate was not mapped by aligner
+ , REVERSE_STRAND = 0x0010 ///< Record was aligned to reverse strand (Sequence() is reverse-complemented)
+ , MATE_REVERSE_STRAND = 0x0020 ///< Record's mate was aligned to reverse strand (mate's Sequence() is reverse-complemented)
+ , MATE_1 = 0x0040 ///< Record is first mate of pair
+ , MATE_2 = 0x0080 ///< Record is second mate of pair
+ , SECONDARY = 0x0100 ///< Record is a secondary alignment
+ , FAILED_QC = 0x0200 ///< Record failed quality controls
+ , DUPLICATE = 0x0400 ///< Record is a PCR/optical duplicate
+ , SUPPLEMENTARY = 0x0800 ///< Record is a supplementary alignment
+ };
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ BamRecordImpl(void);
+ BamRecordImpl(const BamRecordImpl& other);
+ BamRecordImpl(BamRecordImpl&& other);
+ BamRecordImpl& operator=(const BamRecordImpl& other);
+ BamRecordImpl& operator=(BamRecordImpl&& other);
+ virtual ~BamRecordImpl(void);
+
+ /// \}
+
+public:
+ /// \name Core Data
+ /// \{
+
+ /// \returns this record's assigned (BAI) index bin ID.
+ uint32_t Bin(void) const;
+
+ /// \returns this record's alignment flag, in raw integer form.
+ uint32_t Flag(void) const;
+
+ /// \returns this record's insert size
+ int32_t InsertSize(void) const;
+
+ /// \returns this record's mapping quality. A value of 255 indicates "unknown"
+ uint8_t MapQuality(void) const;
+
+ /// \returns this record's mate's mapped position, or -1 if unmapped
+ PacBio::BAM::Position MatePosition(void) const;
+
+ /// \returns this record's mate's mapped reference ID, or -1 if unmapped
+ int32_t MateReferenceId(void) const;
+
+ /// \returns this record's mapped position, or -1 if unmapped
+ PacBio::BAM::Position Position(void) const;
+
+ /// \returns this record's mate's mapped reference ID, or -1 if unmapped
+ int32_t ReferenceId(void) const;
+
+ /// Sets the record's (BAI) index bin ID.
+ ///
+ /// \param[in] bin BAI index bin ID.
+ /// \returns reference to this record
+ ///
+ BamRecordImpl& Bin(uint32_t bin);
+
+ /// Sets this record's alignment flag, using a raw integer.
+ ///
+ /// \param[in] flag raw alignment flag
+ /// \returns reference to this record
+ ///
+ BamRecordImpl& Flag(uint32_t flag);
+
+ /// Sets this record's insert size.
+ ///
+ /// \param[in] iSize insert size
+ /// \returns reference to this record
+ ///
+ BamRecordImpl& InsertSize(int32_t iSize);
+
+ /// Sets this record's map quality.
+ ///
+ /// \param[in] mapQual mapping quality - value of 255 indicates "unknown"
+ /// \returns reference to this record
+ ///
+ BamRecordImpl& MapQuality(uint8_t mapQual);
+
+ /// Sets this record's mate's mapped position.
+ ///
+ /// \param[in] pos mapped position. A value of -1 indicates unmapped.
+ /// \returns reference to this record
+ ///
+ BamRecordImpl& MatePosition(PacBio::BAM::Position pos);
+
+ /// Sets this record's mate's mapped reference ID
+ ///
+ /// \param[in] id reference ID. A value of -1 indicates unmapped.
+ /// \returns reference to this record
+ ///
+ BamRecordImpl& MateReferenceId(int32_t id);
+
+ /// Sets this record's mapped position.
+ ///
+ /// \param[in] pos mapped position. A value of -1 indicates unmapped.
+ /// \returns reference to this record
+ ///
+ BamRecordImpl& Position(PacBio::BAM::Position pos);
+
+ /// Sets this record's mapped reference ID
+ ///
+ /// \param[in] id reference ID. A value of -1 indicates unmapped.
+ /// \returns reference to this record
+ ///
+ BamRecordImpl& ReferenceId(int32_t id);
+
+ /// \}
+
+public:
+ /// \name Alignment Flags
+ /// \{
+
+ /// \returns true if this record is a PCR/optical duplicate
+ bool IsDuplicate(void) const;
+
+ /// \returns true if this record failed quality controls
+ bool IsFailedQC(void) const;
+
+ /// \returns true if this record is the first mate of a pair
+ bool IsFirstMate(void) const;
+
+ /// \returns true if this record was mapped by aligner
+ bool IsMapped(void) const;
+
+ /// \returns true if this record's mate was mapped by aligner
+ bool IsMateMapped(void) const;
+
+ /// \returns true if this record's mate was mapped to the reverse strand
+ bool IsMateReverseStrand(void) const;
+
+ /// \returns true if this record comes from paired-end sequencing
+ bool IsPaired(void) const;
+
+ /// \returns true if this record is a read's primary alignment
+ bool IsPrimaryAlignment(void) const;
+
+ /// \returns true if this record & its mate were properly aligned
+ bool IsProperPair(void) const;
+
+ /// \returns true if this record was mapped to the reverse strand
+ bool IsReverseStrand(void) const;
+
+ /// \returns true if this record is the second mate of a pair
+ bool IsSecondMate(void) const;
+
+ /// \returns true if this record is a supplementary alignment
+ bool IsSupplementaryAlignment(void) const;
+
+ /// Sets whether this record is a PCR/optical duplicate
+ BamRecordImpl& SetDuplicate(bool ok);
+
+ /// Sets whether this record failed quality controls
+ BamRecordImpl& SetFailedQC(bool ok);
+
+ /// Sets whether this record is the first mate of a pair.
+ BamRecordImpl& SetFirstMate(bool ok);
+
+ /// Sets whether this record was aligned.
+ BamRecordImpl& SetMapped(bool ok);
+
+ /// Sets whether this record's mate was aligned.
+ BamRecordImpl& SetMateMapped(bool ok);
+
+ /// Sets whether this record's mate mapped to reverse strand.
+ BamRecordImpl& SetMateReverseStrand(bool ok);
+
+ /// Sets whether this record came from paired-end sequencing.
+ BamRecordImpl& SetPaired(bool ok);
+
+ /// Sets whether this record is a read's primary alignment.
+ BamRecordImpl& SetPrimaryAlignment(bool ok);
+
+ /// Sets whether this record & its mate were properly mapped, per the aligner.
+ BamRecordImpl& SetProperPair(bool ok);
+
+ /// Sets whether this record mapped to reverse strand.
+ BamRecordImpl& SetReverseStrand(bool ok);
+
+ /// Sets whether this record is the second mate of a pair.
+ BamRecordImpl& SetSecondMate(bool ok);
+
+ /// Sets whether this record is a supplementary alignment.
+ BamRecordImpl& SetSupplementaryAlignment(bool ok);
+
+ /// \}
+
+public:
+ /// \name Variable-length Data (sequence, qualities, etc.)
+ /// \{
+
+ /// \returns the record's CIGAR data as a Cigar object
+ Cigar CigarData(void) const;
+
+ /// Sets the record's CIGAR data using a Cigar object
+ ///
+ /// \param[in] cigar PacBio::BAM::Cigar object
+ /// \returns reference to this record
+ ///
+ BamRecordImpl& CigarData(const Cigar& cigar);
+
+ /// Sets the record's CIGAR data using a CIGAR-formatted string.
+ ///
+ /// \param[in] cigarString CIGAR-formatted string
+ /// \returns reference to this record
+ ///
+ BamRecordImpl& CigarData(const std::string& cigarString);
+
+ // TODO: CIGAR iterator - Cigar only or here as well ??
+
+ /// \returns the record's query name
+ std::string Name(void) const;
+
+ /// Sets the record's "query name".
+ ///
+ /// \param name new name
+ /// \returns reference to this record
+ ///
+ BamRecordImpl& Name(const std::string& name);
+
+ /// \returns the record's quality values (phred-style ASCII)
+ ///
+ /// \note Usually Qualities().size() == Sequence.size(). However, in
+ /// some data sets, the quality values are not provided. In that
+ /// case, this method will return an empty container.
+ ///
+ QualityValues Qualities(void) const;
+
+ /// \returns the record's DNA sequence.
+ std::string Sequence(void) const;
+
+ size_t SequenceLength(void) const;
+
+ /// \brief Sets the record's DNA sequence and quality values
+ ///
+ /// This is an overloaded function. Sets the DNA sequence and quality
+ /// values, using the length of \p sequence.
+ ///
+ /// \note When using this overload (and \p qualities is non-empty), the
+ /// lengths of \p sequence and \p qualities \b must be equal.
+ ///
+ /// \todo How to handle mismatched lengths?
+ ///
+ /// \param[in] sequence std::string containing DNA sequence
+ /// \param[in] qualities std::string containing ASCII quality values
+ ///
+ /// \returns reference to this record.
+ ///
+ /// \sa SetSequenceAndQualities(const char* sequence,
+ /// const size_t sequenceLength, const char* qualities)
+ ///
+ BamRecordImpl& SetSequenceAndQualities(const std::string& sequence,
+ const std::string& qualities = std::string());
+
+ /// \brief Sets the record's DNA sequence and quality values.
+ ///
+ /// The \p sequence must consist of IUPAC nucleotide codes {=ACMGRSVTWYHKDBN}.
+ /// The \p qualities, if not empty, must consist of 'phred'-style ASCII
+ /// quality values. \p qualities may be an empty string or NULL pointer in
+ /// cases where there are no such data available.
+ ///
+ /// \param[in] sequence C-string containing DNA sequence
+ /// \param[in] sequenceLength length of DNA sequence
+ /// \param[in] qualities C-string containing 'phred-style' ASCII
+ /// quality values
+ ///
+ /// \note \p sequence does \b NOT have to be NULL-terminated. Length is
+ /// explicitly determined by the value of \p sequenceLength provided.
+ ///
+ /// \returns reference to this record.
+ ///
+ BamRecordImpl& SetSequenceAndQualities(const char* sequence,
+ const size_t sequenceLength,
+ const char* qualities = 0);
+
+ /// \brief Sets the record's DNA sequence and quality values.
+ ///
+ /// The \p encodedSequence should be preencoded/packed into the BAM binary
+ /// format. The \p qualities, if not empty, must consist of 'phred'-style
+ /// ASCII quality values. \p qualities may be an empty string or NULL
+ /// pointer in cases where there are no such data available.
+ ///
+ /// \param[in] encodedSequence C-string containing BAM-format-encoded
+ /// DNA sequence
+ /// \param[in] rawSequenceLength length of DNA sequence (not the encoded
+ /// length)
+ /// \param[in] qualities C-string containing 'phred-style' ASCII
+ /// quality values
+ ///
+ /// \note \p encodedSequence does \b NOT have to be NULL-terminated. Length
+ /// is explicitly determined by the value of \p sequenceLength
+ /// provided.
+ ///
+ /// \returns reference to this record.
+ ///
+ /// \sa SetSequenceAndQualities(const char* sequence,
+ /// const size_t sequenceLength, const char* qualities)
+ ///
+ BamRecordImpl& SetPreencodedSequenceAndQualities(const char* encodedSequence,
+ const size_t rawSequenceLength,
+ const char* qualities = 0);
+
+ /// \}
+
+public:
+ /// \name Tag Data
+ /// \{
+
+ /// \returns record's full tag data as a TagCollection object
+ TagCollection Tags(void) const;
+
+ /// \brief Sets the record's full tag data via a TagCollection object
+ ///
+ BamRecordImpl& Tags(const TagCollection& tags);
+
+ /// \brief Adds a new tag to this record.
+ ///
+ /// \param[in] tagName 2-character tag name.
+ /// \param[in] value Tag object that describes the type & value of data
+ /// to be added
+ ///
+ /// \note Any value that can be used to implicitly construct a Tag is valid.
+ /// \code
+ /// string s;
+ /// vector<uint32_t> v;
+ /// record.AddTag("XX", s); // will add a string-type tag
+ /// record.AddTag("YY", v); // will add a uint32-array-type tag
+ /// \endcode
+ ///
+ /// \returns true if tag was successfully added.
+ ///
+ bool AddTag(const std::string& tagName,
+ const Tag& value);
+
+ /// \brief Adds a new tag to this record.
+ ///
+ /// This is an overloaded method.
+ ///
+ /// \param[in] tag BamRecordTag enum
+ /// \param[in] value Tag object that describes the type & value of data
+ /// to be added
+ /// \returns true if tag was successfully added.
+ ///
+ bool AddTag(const BamRecordTag tag,
+ const Tag& value);
+
+ /// \brief Adds a new tag to this record, with an optional modifier.
+ ///
+ /// \param[in] tagName 2-character tag name.
+ /// \param[in] value Tag object that describes the type &
+ /// value of data to be added
+ /// \param[in] additionalModifier optional extra modifier (for explicit
+ /// modification of an otherwise const Tag)
+ ///
+ /// \note Any value that can be used to implicitly construct a Tag is valid.
+ /// \code
+ /// char c;
+ /// string h;
+ /// record.AddTag("XX", c, TagModifier::ASCII_CHAR); // will add a char-type tag
+ /// record.AddTag("YY", h, TagModifier::HEX_STRING); // will add a hex string-type tag
+ /// \endcode
+ ///
+ /// \returns true if tag was successfully added.
+ ///
+ bool AddTag(const std::string& tagName,
+ const Tag& value,
+ const TagModifier additionalModifier);
+
+ /// \brief Adds a new tag to this record, with an optional modifier.
+ ///
+ /// This is an overloaded method.
+ ///
+ /// \param[in] tag BamRecordTag enum.
+ /// \param[in] value Tag object that describes the type &
+ /// value of data to be added
+ /// \param[in] additionalModifier optional extra modifier (for explicit
+ /// modification of an otherwise const Tag)
+ ///
+ /// \returns true if tag was successfully added.
+ ///
+ bool AddTag(const BamRecordTag tag,
+ const Tag& value,
+ const TagModifier additionalModifier);
+
+ /// \brief Edits an existing tag on this record.
+ ///
+ /// \param[in] tagName 2-character tag name. Name must be present
+ /// (see HasTag)
+ /// \param[in] newValue Tag object that describes the type & value of
+ /// new data to be added
+ ///
+ /// \note Any value that can be used to implicitly construct a Tag is valid.
+ /// \code
+ /// string s;
+ /// vector<uint32_t> v;
+ /// record.EditTag("XX", s); // will overwrite tag XX with a string-type tag
+ /// record.EditTag("YY", v); // will overwrite tag YY with a uint32-array-type tag
+ /// \endcode
+ ///
+ /// \returns true if tag was successfully edited.
+ ///
+ bool EditTag(const std::string& tagName,
+ const Tag& newValue);
+
+ /// \brief Edits an existing tag on this record.
+ ///
+ /// This is an overloaded method.
+ ///
+ /// \param[in] tag BamRecordTag enum
+ /// \param[in] newValue Tag object that describes the type & value of
+ /// new data to be added
+ ///
+ /// \returns true if tag was successfully edited.
+ ///
+ bool EditTag(const BamRecordTag tag,
+ const Tag& newValue);
+
+ /// \brief Edits an existing tag on this record.
+ ///
+ /// \param[in] tagName 2-character tag name. Name must be
+ /// present (see HasTag)
+ /// \param[in] value Tag object that describes the type &
+ /// value of new data to be added
+ /// \param[in] additionalModifier optional extra modifier (for explicit
+ /// modification of an otherwise const Tag)
+ ///
+ /// \note Any value that can be used to implicitly construct a Tag is valid.
+ /// \code
+ /// char c;
+ /// string h;
+ /// record.EditTag("XX", c, TagModifier::ASCII_CHAR); // will overwrite tag XX with a char-type tag
+ /// record.EditTag("YY", h, TagModifier::HEX_STRING); // will overwrite tag YY with a hex string-type tag
+ /// \endcode
+ ///
+ /// \returns true if tag was successfully edited.
+ ///
+ bool EditTag(const std::string& tagName,
+ const Tag& value,
+ const TagModifier additionalModifier);
+
+ /// \brief Edits an existing tag on this record.
+ ///
+ /// This is an overloaded method.
+ ///
+ /// \param[in] tag BamRecordTag enum
+ /// \param[in] value Tag object that describes the type &
+ /// value of new data to be added
+ /// \param[in] additionalModifier optional extra modifier (for explicit
+ /// modification of an otherwise const Tag)
+ ///
+ /// \returns true if tag was successfully edited.
+ ///
+ bool EditTag(const BamRecordTag tag,
+ const Tag& value,
+ const TagModifier additionalModifier);
+
+
+ /// \returns true if a tag with this name is present in this record.
+ bool HasTag(const std::string& tagName) const;
+
+ /// \returns true if this tag is present in this record.
+ ///
+ /// This is an overloaded method.
+ ///
+ bool HasTag(const BamRecordTag tag) const;
+
+ /// \brief Removes an existing tag from this record.
+ ///
+ /// \param[in] tagName 2-character tag name.
+ ///
+ /// \returns true if tag was actaully removed (i.e. false if tagName
+ /// previously unknown)
+ /// \sa HasTag
+ ///
+ bool RemoveTag(const std::string& tagName);
+
+ /// \brief Removes an existing tag from this record.
+ ///
+ /// This is an overloaded method.
+ ///
+ /// \param[in] tag BamRecordTag enum
+ ///
+ /// \returns true if tag was actaully removed (i.e. false if tagName
+ /// previously unknown)
+ /// \sa HasTag
+ ///
+ bool RemoveTag(const BamRecordTag tag);
+
+ /// \brief Fetches a tag from this record.
+ ///
+ /// \param[in] tagName 2-character tag name.
+ ///
+ /// \returns Tag object for the requested name. If name is unknown, a
+ /// default constructed Tag is returned (Tag::IsNull() is true).
+ ///
+ Tag TagValue(const std::string& tagName) const;
+
+ /// \brief Fetches a tag from this record.
+ ///
+ /// This is an overloaded method
+ ///
+ /// \param[in] tag BamRecordTag enum
+ ///
+ /// \returns Tag object for the requested name. If name is unknown, a
+ /// default constructed Tag is returned (Tag::IsNull() is true).
+ ///
+ Tag TagValue(const BamRecordTag tag) const;
+
+ // change above to Tag();
+
+// template<typename T>
+// T TagValue(const std::string& tagName) const;
+
+
+ /// \}
+
+private:
+ // returns a BamRecordImpl object, with a deep copy of @rawData contents
+ static BamRecordImpl FromRawData(const PBBAM_SHARED_PTR<bam1_t>& rawData);
+
+ // internal memory setup/expand methods
+ void InitializeData(void);
+ void MaybeReallocData(void);
+ void UpdateTagMap(void) const; // allowed to be called from const methods
+ // (lazy update on request)
+
+ // internal tag helper methods
+ bool AddTagImpl(const std::string& tagName,
+ const Tag& value,
+ const TagModifier additionalModifier);
+ bool RemoveTagImpl(const std::string& tagName);
+ int TagOffset(const std::string& tagName) const;
+
+ // core seq/qual logic shared by the public API
+ BamRecordImpl& SetSequenceAndQualitiesInternal(const char* sequence,
+ const size_t sequenceLength,
+ const char* qualities,
+ bool isPreencoded);
+
+private:
+
+ // data members
+ PBBAM_SHARED_PTR<bam1_t> d_;
+ mutable std::map<uint16_t, int> tagOffsets_;
+
+ // friends
+ friend class internal::BamRecordMemory;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/BamRecordImpl.inl"
+
+#endif // BAMRECORDIMPL_H
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecordTag.h
+/// \brief Defines the BamRecordTag enum.
+//
+// Author: Derek Barnett
+
+#ifndef BAMRECORDTAG_H
+#define BAMRECORDTAG_H
+
+namespace PacBio {
+namespace BAM {
+
+enum class BamRecordTag
+{
+ ALT_LABEL_QV
+ , ALT_LABEL_TAG
+ , BARCODE_QUALITY
+ , BARCODES
+ , CONTEXT_FLAGS
+ , DELETION_QV
+ , DELETION_TAG
+ , HOLE_NUMBER
+ , INSERTION_QV
+ , IPD
+ , LABEL_QV
+ , MERGE_QV
+ , NUM_PASSES
+ , PKMEAN
+ , PKMEAN_2
+ , PKMID
+ , PKMID_2
+ , PRE_PULSE_FRAMES
+ , PULSE_CALL
+ , PULSE_CALL_WIDTH
+ , PULSE_MERGE_QV
+ , PULSE_WIDTH
+ , QUERY_END
+ , QUERY_START
+ , READ_ACCURACY
+ , READ_GROUP
+ , SCRAP_REGION_TYPE
+ , SCRAP_ZMW_TYPE
+ , SNR
+ , START_FRAME
+ , SUBSTITUTION_QV
+ , SUBSTITUTION_TAG
+
+ //
+ // not tags per se, but faking these here to simplify data fetching
+ //
+ , QUAL
+ , SEQ
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // BAMRECORDTAG_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecordView.h
+/// \brief Defines the BamRecordView class.
+//
+// Author: Derek Barnett
+
+#ifndef BAMRECORDVIEW_H
+#define BAMRECORDVIEW_H
+
+#include "pbbam/BamRecord.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief Provides a re-usable "view" onto a BamRecord
+///
+/// This class acts a convenience wrapper for working with per-base BamRecord
+/// data. Most of these BamRecord methods take a list of parameters, to adjust
+/// how the underlying data are presented to client code. Often these parameters
+/// will be re-used for each BamRecord method call. Thus, to simplify such
+/// client code, a BamRecordView can be used to state those parameters once, and
+/// then simply request the desired fields.
+///
+/// \internal
+/// \todo Sync up method names with BamRecord
+/// \endinternal
+///
+class PBBAM_EXPORT BamRecordView
+{
+public:
+ /// \brief Constructs a view onto \p record using the supplied parameters.
+ ///
+ /// For frame or QV data, if \p aligned is true, a value of 0 (Accuracy or
+ /// QualityValue) will be used at each inserted or padded base location.
+ ///
+ /// \param[in] record BamRecord data source.
+ /// \param[in] orientation Orientation of output.
+ /// \param[in] aligned if true, gaps/padding will be inserted, per
+ /// Cigar info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ BamRecordView(const BamRecord& record,
+ const Orientation orientation,
+ const bool aligned,
+ const bool exciseSoftClips,
+ const PulseBehavior pulseBehavior = PulseBehavior::ALL);
+
+public:
+
+ /// \returns BamRecord::AltLabelQV with this view's parameters applied
+ QualityValues AltLabelQVs(void) const;
+
+ /// \returns BamRecord::AltLabelTag with this view's parameters applied
+ std::string AltLabelTags(void) const;
+
+ /// \returns BamRecord::DeletionQV with this view's parameters applied
+ QualityValues DeletionQVs(void) const;
+
+ /// \returns BamRecord::DeletionTag with this view's parameters applied
+ std::string DeletionTags(void) const;
+
+ /// \returns BamRecord::InsertionQV with this view's parameters applied
+ QualityValues InsertionQVs(void) const;
+
+ /// \returns BamRecord::IPD with this view's parameters applied
+ Frames IPD(void) const;
+
+ /// \returns BamRecord::LabelQV with this view's parameters applied
+ QualityValues LabelQVs(void) const;
+
+ /// \returns BamRecord::MergeQV with this view's parameters applied
+ QualityValues MergeQVs(void) const;
+
+ /// \returns BamRecord::PulseMergeQV with this view's parameters applied
+ QualityValues PulseMergeQVs(void) const;
+
+ /// \returns BamRecord::Pkmean with this view's parameters applied
+ std::vector<float> Pkmean(void) const;
+
+ /// \returns BamRecord::Pkmid with this view's parameters applied
+ std::vector<float> Pkmid(void) const;
+
+ /// \returns BamRecord::Pkmean2 with this view's parameters applied
+ std::vector<float> Pkmean2(void) const;
+
+ /// \returns BamRecord::Pkmid2 with this view's parameters applied
+ std::vector<float> Pkmid2(void) const;
+
+ /// \returns BamRecord::PreBaseFrames with this view's parameters applied
+ Frames PrebaseFrames(void) const;
+
+ /// \returns BamRecord::PrePulseFrames with this view's parameters applied
+ Frames PrePulseFrames(void) const;
+
+ /// \returns BamRecord::PulseCalls with this view's parameters applied
+ std::string PulseCalls(void) const;
+
+ /// \returns BamRecord::PulseCallWidth with this view's parameters applied
+ Frames PulseCallWidth(void) const;
+
+ /// \returns BamRecord::PulseWidths with this view's parameters applied
+ Frames PulseWidths(void) const;
+
+ /// \returns BamRecord::Qualities with this view's parameters applied
+ QualityValues Qualities(void) const;
+
+ /// \returns BamRecord::Sequence with this view's parameters applied
+ std::string Sequence(void) const;
+
+ /// \returns BamRecord::StartFrame with this view's parameters applied
+ std::vector<uint32_t> StartFrames(void) const;
+
+ /// \returns BamRecord::SubstitutionQV with this view's parameters applied
+ QualityValues SubstitutionQVs(void) const;
+
+ /// \returns BamRecord::SubstitutionTag with this view's parameters applied
+ std::string SubstitutionTags(void) const;
+
+private:
+ const BamRecord& record_;
+ Orientation orientation_;
+ bool aligned_;
+ bool exciseSoftClips_;
+ PulseBehavior pulseBehavior_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/BamRecordView.inl"
+
+#endif // BAMRECORDVIEW_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamTagCodec.h
+/// \brief Defines the BamTagCodec class.
+//
+// Author: Derek Barnett
+
+#ifndef BAMTAGCODEC_H
+#define BAMTAGCODEC_H
+
+#include "pbbam/Config.h"
+#include "pbbam/TagCollection.h"
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The BamTagCodec class provides binary encoding/decoding of %BAM tag
+/// data.
+///
+/// \note BamTagCodec is mostly an implementation and/or testing detail, and may
+/// be removed from the public API.
+///
+class PBBAM_EXPORT BamTagCodec
+{
+public:
+ /// \name Tag Collection Methods
+ /// \{
+
+ /// \brief Creates a TagCollection from raw BAM data.
+ ///
+ /// \param[in] data BAM-formatted (binary) tag data
+ /// \returns TagCollection containing tag data
+ ///
+ static TagCollection Decode(const std::vector<uint8_t>& data);
+
+ /// \brief Creates binary BAM data from a TagCollection.
+ ///
+ /// \param[in] tags TagCollection containing tag data
+ /// \returns vector of bytes (encoded BAM data)
+ ///
+ static std::vector<uint8_t> Encode(const PacBio::BAM::TagCollection& tags);
+
+ /// \}
+
+public:
+ /// \name Per-Tag Methods
+ /// \{
+
+ /// \brief Determines the SAM/BAM tag code for a Tag.
+ ///
+ /// \param[in] tag Tag object to check
+ /// \param[in] additionalModifier optional extra modifier (allows explicit
+ /// modification of an otherwise const Tag)
+ ///
+ /// \returns the SAM/BAM single char code for tag type
+ ///
+ static uint8_t TagTypeCode(const PacBio::BAM::Tag& tag,
+ const TagModifier& additionalModifier = TagModifier::NONE);
+
+ /// \brief Encodes a single Tag's contents in %BAM binary
+ ///
+ /// \note This method does \b NOT encode the tag name & tag type. It does
+ /// include the element type for array-type tags.
+ ///
+ /// \param[in] tag Tag object containing data to encode
+ /// \param[in] additionalModifier optional extra modifier (allows explicit
+ /// modification of an otherwise const Tag)
+ ///
+ /// \returns vector of bytes (encoded BAM data)
+ ///
+ static std::vector<uint8_t> ToRawData(const PacBio::BAM::Tag& tag,
+ const TagModifier& additionalModifier = TagModifier::NONE);
+
+ /// \brief Creates a Tag object from binary BAM data.
+ ///
+ /// \param[in] rawData raw BAM bytes (assumed to be the result of
+ /// htslib's bam_aux_get())
+ ///
+ /// \returns resulting Tag object
+ ///
+ static PacBio::BAM::Tag FromRawData(uint8_t* rawData);
+
+ /// \}
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // BAMTAGCODEC_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamWriter.h
+/// \brief Defines the BamWriter class.
+//
+// Author: Derek Barnett
+
+#ifndef BAMWRITER_H
+#define BAMWRITER_H
+
+#include "pbbam/BamHeader.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/Config.h"
+#include "pbbam/IRecordWriter.h"
+#include <htslib/sam.h>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+class BamFile;
+
+namespace internal { class BamWriterPrivate; }
+
+/// \brief The BamWriter class provides a writing interface for creating
+/// new %BAM files.
+///
+/// \note The underlying buffered data may not be flushed to the file until the
+/// destructor is called. Trying to access the file (reading, stat-ing,
+/// indexing, etc.) before the BamWriter is destroyed yields undefined
+/// behavior. Enclose the BamWriter in some form of local scope (curly
+/// braces, a separate function, etc.) to ensure that its destructor is
+/// called before proceeding to read-based operations.
+///
+/// \code{.cpp}
+/// {
+/// BamWriter w(...);
+/// // write data
+/// }
+/// // now safe to access the new file
+/// \endcode
+///
+///
+class PBBAM_EXPORT BamWriter : public IRecordWriter
+{
+public:
+ /// \brief This enum allows you to control the compression level of the
+ /// output %BAM file.
+ ///
+ /// Values are equivalent to zlib compression levels. See its documentation
+ /// for more details: http://www.zlib.net/manual.html
+ ///
+ enum CompressionLevel
+ {
+ CompressionLevel_0 = 0
+ , CompressionLevel_1 = 1
+ , CompressionLevel_2 = 2
+ , CompressionLevel_3 = 3
+ , CompressionLevel_4 = 4
+ , CompressionLevel_5 = 5
+ , CompressionLevel_6 = 6
+ , CompressionLevel_7 = 7
+ , CompressionLevel_8 = 8
+ , CompressionLevel_9 = 9
+
+ , DefaultCompression = -1
+ , NoCompression = CompressionLevel_0
+ , FastCompression = CompressionLevel_1
+ , BestCompression = CompressionLevel_9
+ };
+
+ /// \brief This enum allows you to control whether BAI bin numbers are
+ /// calculated for output records.
+ ///
+ /// For most cases, the default behavior (ON) should be retained for maximum
+ /// compatibility with downstream tools (e.g. samtools index). Disabling bin
+ /// calculation should only be used if all records are known to never be
+ /// mapped, and even then only if profiling revelas the calculation to
+ /// affect extremely performance-sensitive, "critical paths".
+ ///
+ enum BinCalculationMode
+ {
+ BinCalculation_ON = 0
+ , BinCalculation_OFF
+ };
+
+public:
+
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Opens a %BAM file for writing & writes the header information.
+ ///
+ /// The error status will be set if either operation fails.
+ ///
+ /// \note Set \p filename to "-" for stdout.
+ ///
+ /// \param[in] filename path to output %BAM file
+ /// \param[in] header BamHeader object
+ /// \param[in] compressionLevel zlib compression level
+ /// \param[in] numThreads number of threads for compression. If set to
+ /// 0, BamWriter will attempt to determine a
+ /// reasonable estimate. If set to 1, this will
+ /// force single-threaded execution. No checks
+ /// are made against an upper limit.
+ ///
+ /// \param[in] binCalculationMode BAI bin calculation mode. The default
+ /// behavior will ensure proper bin numbers are provided for all
+ /// records written. This extra step may turned off when bin
+ /// numbers are not needed. Though if in doubt, keep the default.
+ ///
+ /// \throws std::runtmie_error if there was a problem opening the file for
+ /// writing or if an error occurred while writing the header
+ ///
+ BamWriter(const std::string& filename,
+ const BamHeader& header,
+ const BamWriter::CompressionLevel compressionLevel = BamWriter::DefaultCompression,
+ const size_t numThreads = 4,
+ const BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON);
+
+ /// Fully flushes all buffered data & closes file.
+ ~BamWriter(void);
+
+ /// \}
+
+public:
+
+ /// \name Data Writing & Resource Management
+ /// \{
+
+ /// \brief Try to flush any buffered data to file.
+ ///
+ /// \note The underlying implementation doesn't necessarily flush buffered
+ /// data immediately, especially in a multithreaded writer situation.
+ /// Let the BamWriter go out of scope to fully ensure flushing.
+ ///
+ /// \throws std::runtime_error if flush fails
+ ///
+ void TryFlush(void);
+
+ /// \brief Write a record to the output %BAM file.
+ ///
+ /// \param[in] record BamRecord object
+ ///
+ /// \throws std::runtime_error on failure to write
+ ///
+ void Write(const BamRecord& record);
+
+ /// \brief Write a record to the output %BAM file.
+ ///
+ /// \param[in] record BamRecord object
+ /// \param[out] vOffset BGZF virtual offset to start of \p record
+ ///
+ /// \throws std::runtime_error on failure to write
+ ///
+ void Write(const BamRecord& record, int64_t* vOffset);
+
+ /// \brief Write a record to the output %BAM file.
+ ///
+ /// \param[in] recordImpl BamRecordImpl object
+ ///
+ /// \throws std::runtime_error on failure to write
+ ///
+ void Write(const BamRecordImpl& recordImpl);
+
+ /// \}
+
+private:
+ std::unique_ptr<internal::BamWriterPrivate> d_;
+ DISABLE_MOVE_AND_COPY(BamWriter);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // BAMWRITER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BarcodeQuery.h
+/// \brief Defines the BarcodeQuery class.
+//
+// Author: Derek Barnett
+
+#ifndef BARCODEQUERY_H
+#define BARCODEQUERY_H
+
+#include "pbbam/Config.h"
+#include "pbbam/internal/QueryBase.h"
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The BarcodeQuery class provides iterable access to a DataSet's %BAM
+/// records, limiting results to those matching a particular barcode.
+///
+/// Example:
+/// \include code/BarcodeQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+/// Use BamFile::EnsurePacBioIndexExists before creating the query if one
+/// may not be present.
+///
+class PBBAM_EXPORT BarcodeQuery : public internal::IQuery
+{
+public:
+ /// \brief Creates a new BarcodeQuery, limiting record results to only those
+ /// annotated with a particular barcode ID.
+ ///
+ /// \param[in] barcode filtering criteria
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \sa BamRecord::Barcodes
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or PBI
+ /// files.
+ ///
+ BarcodeQuery(const int16_t barcode, const DataSet& dataset);
+
+ ~BarcodeQuery(void);
+
+public:
+
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
+
+private:
+ struct BarcodeQueryPrivate;
+ std::unique_ptr<BarcodeQueryPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // BARCODEQUERY_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Cigar.h
+/// \brief Defines the Cigar class.
+//
+// Author: Derek Barnett
+
+#ifndef CIGAR_H
+#define CIGAR_H
+
+#include "pbbam/CigarOperation.h"
+#include "pbbam/Config.h"
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The Cigar class represents the CIGAR string used to report alignment
+/// charateristics in SAM/BAM.
+///
+/// \note Use of the 'M' operator is forbidden in PacBio BAMs. See
+/// CigarOperationType description for more information.
+///
+/// \sa https://samtools.github.io/hts-specs/SAMv1.pdf for more information on CIGAR in general.
+///
+class PBBAM_EXPORT Cigar : public std::vector<CigarOperation>
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates a Cigar object from SAM/BAM string input
+ ///
+ /// \param [in] stdString SAM/BAM formatted CIGAR data
+ /// \returns a Cigar object representing the input data
+ ///
+ /// \note This class may be removed from the public API in the future,
+ /// as the constructor taking a std::string accomplishes the same end.
+ ///
+ static Cigar FromStdString(const std::string& stdString);
+
+ /// \brief Creates an empty Cigar.
+ Cigar(void);
+
+ /// \brief Creates a Cigar object from SAM/BAM string input
+ ///
+ /// \param [in] cigarString SAM/BAM formatted CIGAR data
+ ///
+ Cigar(const std::string& cigarString);
+
+ Cigar(const Cigar& other);
+ Cigar(Cigar&& other);
+ Cigar& operator=(const Cigar& other);
+ Cigar& operator=(Cigar&& other);
+ ~Cigar(void);
+
+ /// \}
+
+public:
+ /// \name Conversion Methods
+ /// \{
+
+ /// Converts Cigar object data to SAM/BAM formatted string
+ ///
+ /// \returns SAM/BAM formatted std::string
+ ///
+ std::string ToStdString(void) const;
+
+ /// \}
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/Cigar.inl"
+
+#endif // CIGAR_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file CigarOperation.h
+/// \brief Defines the CigarOperationType enum & CigarOperation class.
+//
+// Author: Derek Barnett
+
+#ifndef CIGAROPERATION_H
+#define CIGAROPERATION_H
+
+#include "pbbam/Config.h"
+#include <stdexcept>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief Describes a CIGAR operation.
+///
+/// Bracketed character is the corresponding SAM/BAM character code.
+///
+/// \warning ALIGNMENT_MATCH ('M') is included in this enum to maintain
+/// consistency with htslib. However, as of PacBio BAM spec version
+/// 3.0b7, this CIGAR operation \b forbidden. Any attempt to read or
+/// write a record containing this operation will trigger a
+/// std::runtime_error. SEQUENCE_MATCH('=) or SEQUENCE_MISMATCH('X')
+/// should be used instead.
+///
+enum class CigarOperationType
+{
+ UNKNOWN_OP = -1 ///< unknown/invalid CIGAR operator
+ , ALIGNMENT_MATCH = 0 ///< alignment match (can be a sequence match or mismatch) [M]
+ , INSERTION ///< insertion to the reference [I]
+ , DELETION ///< deletion from the reference [D]
+ , REFERENCE_SKIP ///< skipped region from the reference [N]
+ , SOFT_CLIP ///< soft clipping (clipped sequences present in SEQ) [S]
+ , HARD_CLIP = 5 ///< hard clipping (clipped sequences NOT present in SEQ) [H]
+ , PADDING ///< padding (silent deletion from padded reference) [P]
+ , SEQUENCE_MATCH ///< sequence match [=]
+ , SEQUENCE_MISMATCH ///< sequence mismatch [X]
+};
+
+/// \brief The CigarOperation class represents a single CIGAR operation
+/// (consisting of a type & length).
+///
+class PBBAM_EXPORT CigarOperation
+{
+public:
+
+ /// \name Operation Type Conversion Methods
+ /// \{
+
+ /// Convert between CigarOperationType enum & SAM/BAM character code.
+ ///
+ /// \param[in] type CigarOperationType value
+ /// \returns SAM/BAM character code
+ static char TypeToChar(const CigarOperationType type);
+
+ /// Convert between CigarOperationType enum & SAM/BAM character code.
+ ///
+ /// \param[in] c SAM/BAM character code
+ /// \returns CigarOperationType value
+ static CigarOperationType CharToType(const char c);
+
+ /// \}
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ CigarOperation(void);
+ CigarOperation(char c, uint32_t length);
+ CigarOperation(CigarOperationType op, uint32_t length);
+ CigarOperation(const CigarOperation& other);
+ CigarOperation(CigarOperation&& other);
+ CigarOperation& operator=(const CigarOperation& other);
+ CigarOperation& operator=(CigarOperation&& other);
+ ~CigarOperation(void);
+
+ /// \}
+
+public:
+
+ /// \returns operation type as SAM/BAM char code
+ inline char Char(void) const;
+
+ /// \returns operation length
+ inline uint32_t Length(void) const;
+
+ /// \returns operation type as CigarOperationType enum value
+ inline CigarOperationType Type(void) const;
+
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ /// Sets this operation type.
+ ///
+ /// \param[in] opChar SAM/BAM character code
+ /// \returns reference to this operation
+ inline CigarOperation& Char(const char opChar);
+
+ /// Sets this operation length.
+ ///
+ /// \param[in] length
+ /// \returns reference to this operation
+ inline CigarOperation& Length(const uint32_t length);
+
+ /// Sets this operation type.
+ ///
+ /// \param[in] opType CigarOperationType value
+ /// \returns reference to this operation
+ inline CigarOperation& Type(const CigarOperationType opType);
+
+ /// \}
+
+public:
+ /// \name Comparison Operators
+ /// \{
+
+ /// \returns true if both CIGAR operation type & length match
+ inline bool operator==(const CigarOperation& other) const;
+
+ /// \returns true if either CIGAR operation type or length differ
+ inline bool operator!=(const CigarOperation& other) const;
+
+ /// \}
+
+private:
+ CigarOperationType type_;
+ uint32_t length_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/CigarOperation.inl"
+
+#endif // CIGAROPERATION_H
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ClipType.h
+/// \brief Defines the ClipType enum.
+//
+// Author: Derek Barnett
+
+#ifndef CLIPTYPE_H
+#define CLIPTYPE_H
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief This enum defines the modes supported by BamRecord clipping
+/// operations.
+///
+/// Methods like BamRecord::Clip accept Position parameters - which may be in
+/// either polymerase or reference coorindates. Using this enum as a flag
+/// indicates how the positions should be interpreted.
+///
+enum class ClipType
+{
+ CLIP_NONE ///< No clipping will be performed.
+ , CLIP_TO_QUERY ///< Clipping positions are in polymerase coordinates.
+ , CLIP_TO_REFERENCE ///< Clipping positions are in genomic coordinates.
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // CLIPTYPE_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Compare.h
+/// \brief Defines the Compare class & a number of function objects for
+/// comparing BamRecords.
+//
+// Author: Derek Barnett
+
+#ifndef COMPARE_H
+#define COMPARE_H
+
+#include "pbbam/BamRecord.h"
+#include <functional>
+#include <string>
+#include <utility>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The Compare class provides utilities for sorting collections of
+/// BamRecords.
+///
+/// \note The functors provided here currently only support std::less<T>
+/// comparisons (i.e. sorting by ascending value).
+///
+/// \include code/Compare.txt
+///
+struct PBBAM_EXPORT Compare
+{
+public:
+
+ /// \name Comparison Type
+ /// \{
+
+ /// \brief This enum defines the supported comparison types
+ /// { ==, !=, <, <=, >, >=, & (contains), ~ (not contains) }.
+ ///
+ enum Type {
+ EQUAL = 0
+ , NOT_EQUAL
+ , LESS_THAN
+ , LESS_THAN_EQUAL
+ , GREATER_THAN
+ , GREATER_THAN_EQUAL
+ , CONTAINS
+ , NOT_CONTAINS
+ };
+
+ /// \brief Convert operator string to Compare::Type.
+ ///
+ /// \include code/Compare_TypeFromOperator.txt
+ ///
+ /// \param[in] opString operator string. Can be C++-style operators
+ /// ("==", "!=", "<=", etc) or alpha equivalents
+ /// ("eq", "ne", "lte", etc).
+ ///
+ /// \returns comparison type from an operator string
+ /// \throws std::runtime_error if cannot convert opString to Compare::Type
+ /// \sa Compare::TypeToOperator
+ ///
+ static Compare::Type TypeFromOperator(const std::string& opString);
+
+ /// \brief Convert a Compare::Type to printable enum name.
+ ///
+ /// \include code/Compare_TypeToName.txt
+ ///
+ /// \param[in] type Compare::Type to convert
+ /// \returns the printable name for a Compare::Type enum value.are::Type
+ /// \throws std::runtime_error on unknown Compare::Type
+ ///
+ static std::string TypeToName(const Compare::Type& type);
+
+ /// \brief Convert a Compare::Type to printable operator.
+ ///
+ /// \param[in] type Compare::Type to convert
+ /// \param[in] asAlpha (optional) flag to print using alpha equivalents
+ /// e.g. "lte" rather than "<="
+ /// \returns the printable operator string
+ /// \throws std::runtime_error on unknown Compare::Type
+ ///
+ static std::string TypeToOperator(const Compare::Type& type,
+ bool asAlpha = false);
+
+ /// \}
+
+public:
+
+ /// \name Comparison Function Objects
+ /// \{
+
+ /// %Base class for all BamRecord compare functors.
+ ///
+ /// Mostly used for method signatures that can accept any comparator.
+ ///
+ /// Custom comparators may be used by inheriting from this class.
+ ///
+ struct Base : public std::function<bool(const BamRecord&, const BamRecord&)> { };
+
+private:
+ /// \internal
+ ///
+ /// Exists to provide the typedef we'll use in the actual
+ /// MemberFunctionBase, since we need to use it in the template signature.
+ /// This keeps that a lot easier to read.
+ ///
+ template<typename ValueType>
+ struct MemberFunctionBaseHelper : public Compare::Base
+ {
+ typedef ValueType (BamRecord::*MemberFnType)(void) const;
+ };
+
+public:
+ /// \brief %Base class for all BamRecord compare functors that take a
+ /// BamRecord function pointer and compare on its return type.
+ ///
+ /// Derived comparators usually need only declare the return value &
+ /// function pointer in the template signature. This class implements the
+ /// basic method-calling machinery.
+ ///
+ /// Custom comparators will work for any BamRecord member function that does
+ /// not take any input parameters.
+ ///
+ template<typename ValueType,
+ typename MemberFunctionBaseHelper<ValueType>::MemberFnType fn,
+ typename CompareType = std::less<ValueType> >
+ struct MemberFunctionBase : public Compare::MemberFunctionBaseHelper<ValueType>
+ {
+ bool operator()(const BamRecord& lhs, const BamRecord& rhs) const;
+ };
+
+public:
+
+ /// \brief Compares on BamRecord::AlignedEnd.
+ ///
+ /// Example:
+ /// \include code/Compare_AlignedEnd.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct AlignedEnd : public MemberFunctionBase<Position, &BamRecord::AlignedEnd> { };
+
+ /// \brief Compares on BamRecord::AlignedStart.
+ ///
+ /// Example:
+ /// \include code/Compare_AlignedStart.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct AlignedStart : public MemberFunctionBase<Position, &BamRecord::AlignedStart> { };
+
+ /// \brief Compares on BamRecord::AlignedStrand
+ ///
+ /// Example:
+ /// \include code/Compare_AlignedStrand.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct AlignedStrand : public MemberFunctionBase<Strand, &BamRecord::AlignedStrand> { };
+
+ /// \brief Compares on BamRecord::BarcodeForward.
+ ///
+ /// Example:
+ /// \include code/Compare_BarcodeForward.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct BarcodeForward : public MemberFunctionBase<int16_t, &BamRecord::BarcodeForward> { };
+
+ /// \brief Compares on BamRecord::BarcodeQuality.
+ ///
+ /// Example:
+ /// \include code/Compare_BarcodeQuality.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct BarcodeQuality : public MemberFunctionBase<uint8_t, &BamRecord::BarcodeQuality> { };
+
+ /// \brief Compares on BamRecord::BarcodeReverse.
+ ///
+ /// Example:
+ /// \include code/Compare_BarcodeReverse.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct BarcodeReverse: public MemberFunctionBase<int16_t, &BamRecord::BarcodeReverse> { };
+
+ /// \brief Compares on BamRecord::FullName.
+ ///
+ /// Example:
+ /// \include code/Compare_FullName.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct FullName : public MemberFunctionBase<std::string, &BamRecord::FullName> { };
+
+ /// \brief Compares on BamRecord::LocalContextFlags.
+ ///
+ /// Example:
+ /// \include code/Compare_LocalContextFlag.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct LocalContextFlag : public MemberFunctionBase<LocalContextFlags, &BamRecord::LocalContextFlags> { };
+
+ /// \brief Compares on BamRecord::MapQuality.
+ ///
+ /// Example:
+ /// \include code/Compare_MapQuality.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct MapQuality : public MemberFunctionBase<uint8_t, &BamRecord::MapQuality> { };
+
+ /// \brief Compares on BamRecord::MovieName.
+ ///
+ /// Example:
+ /// \include code/Compare_MovieName.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct MovieName : public MemberFunctionBase<std::string, &BamRecord::MovieName> { };
+
+ /// \brief Provides an operator() is essentially a no-op for
+ /// comparing/sorting.
+ ///
+ /// If used in a sorting operation, then no change will occur.
+ ///
+ struct None : public Compare::Base
+ {
+ bool operator()(const BamRecord&, const BamRecord&) const;
+ };
+
+ ///\brief Compares on BamRecord::NumDeletedBases.
+ ///
+ /// Example:
+ /// \include code/Compare_NumDeletedBases.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct NumDeletedBases : public MemberFunctionBase<size_t, &BamRecord::NumDeletedBases> { };
+
+ /// \brief Compares on BamRecord::NumInsertedBases.
+ ///
+ /// Example:
+ /// \include code/Compare_NumInsertedBases.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct NumInsertedBases : public MemberFunctionBase<size_t, &BamRecord::NumInsertedBases> { };
+
+ /// \brief Compares on BamRecord::NumMatches.
+ ///
+ /// Example:
+ /// \include code/Compare_NumMatches.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct NumMatches : public MemberFunctionBase<size_t, &BamRecord::NumMatches> { };
+
+ /// \brief Compares on BamRecord::NumMismatches.
+ ///
+ /// Example:
+ /// \include code/Compare_NumMismatches.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct NumMismatches : public MemberFunctionBase<size_t, &BamRecord::NumMismatches> { };
+
+ /// \brief Compares on BamRecord::QueryEnd.
+ ///
+ /// Example:
+ /// \include code/Compare_QueryEnd.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct QueryEnd : public MemberFunctionBase<Position, &BamRecord::QueryEnd> { };
+
+ /// \brief Compares on BamRecord::QueryStart.
+ ///
+ /// Example:
+ /// \include code/Compare_QueryStart.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct QueryStart : public MemberFunctionBase<Position, &BamRecord::QueryStart> { };
+
+ /// \brief Compares on BamRecord::ReadAccuracy.
+ ///
+ /// Example:
+ /// \include code/Compare_ReadAccuracy.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReadAccuracy : public MemberFunctionBase<Accuracy, &BamRecord::ReadAccuracy> { };
+
+ /// \brief Compares on BamRecord::ReadGroupId.
+ ///
+ /// \note Even though the ReadGroupId string contains hex values, it is
+ /// still just a std::string. Comparisons will use lexical, not
+ /// numeric ordering. If numeric ordering is desired, use
+ /// Compare::ReadGroupNumericId instead.
+ ///
+ /// Example:
+ /// \include code/Compare_ReadGroupId.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReadGroupId : public MemberFunctionBase<std::string, &BamRecord::ReadGroupId> { };
+
+ /// \brief Compares on BamRecord::ReadGroupNumericId.
+ ///
+ /// Example:
+ /// \include code/Compare_ReadGroupNumericId.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReadGroupNumericId : public MemberFunctionBase<int32_t, &BamRecord::ReadGroupNumericId> { };
+
+ /// \brief Compares on BamRecord::ReferenceEnd.
+ ///
+ /// Example:
+ /// \include code/Compare_ReferenceEnd.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReferenceEnd : public MemberFunctionBase<Position, &BamRecord::ReferenceEnd> { };
+
+ /// \brief Compares on BamRecord::ReferenceId.
+ ///
+ /// Example:
+ /// \include code/Compare_ReferenceId.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReferenceId : public MemberFunctionBase<int32_t, &BamRecord::ReferenceId> { };
+
+ /// \brief Compares on BamRecord::ReferenceName.
+ ///
+ /// Example:
+ /// \include code/Compare_ReferenceName.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReferenceName : public MemberFunctionBase<std::string, &BamRecord::ReferenceName> { };
+
+ /// \brief Compares on BamRecord::ReferenceStart.
+ ///
+ /// Example:
+ /// \include code/Compare_ReferenceStart.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct ReferenceStart : public MemberFunctionBase<Position, &BamRecord::ReferenceStart> { };
+
+ /// \brief Compares on BamRecord::HoleNumber.
+ ///
+ /// Example:
+ /// \include code/Compare_Zmw.txt
+ ///
+ /// \note Currently only supports std::less<T> comparisons (i.e. sorting by
+ /// ascending value).
+ ///
+ struct Zmw : public MemberFunctionBase<int32_t, &BamRecord::HoleNumber> { };
+
+ /// \}
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/Compare.inl"
+
+#endif // COMPARE_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file CompositeBamReader.h
+/// \brief Defines the composite BAM readers, for working with multiple input
+/// files.
+//
+// Author: Derek Barnett
+
+#ifndef COMPOSITEBAMREADER_H
+#define COMPOSITEBAMREADER_H
+
+#include "pbbam/BaiIndexedBamReader.h"
+#include "pbbam/BamFile.h"
+#include "pbbam/BamHeader.h"
+#include "pbbam/BamReader.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/Config.h"
+#include "pbbam/DataSet.h"
+#include "pbbam/GenomicInterval.h"
+#include "pbbam/PbiIndexedBamReader.h"
+#include <deque>
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal {
+
+/// \internal
+/// \brief The CompositeMergeItem class provides a helper struct for composite
+/// readers, containing a single-file reader and its "next" record.
+///
+struct CompositeMergeItem
+{
+public:
+ std::unique_ptr<BamReader> reader;
+ BamRecord record;
+
+public:
+ CompositeMergeItem(std::unique_ptr<BamReader>&& rdr);
+ CompositeMergeItem(std::unique_ptr<BamReader>&& rdr, BamRecord&& rec);
+ CompositeMergeItem(CompositeMergeItem&& other);
+ CompositeMergeItem& operator=(CompositeMergeItem&& other);
+ ~CompositeMergeItem(void);
+};
+
+/// \internal
+/// \brief The CompositeMergeItemSorter class provides a helper function object
+/// for ordering composite reader results.
+///
+/// Essentially just exracts a BamRecord from its parent CompositeMergeItem for
+/// further checks.
+///
+template<typename CompareType>
+struct CompositeMergeItemSorter : public std::function<bool(const CompositeMergeItem&,
+ const CompositeMergeItem&)>
+{
+ bool operator()(const CompositeMergeItem& lhs,
+ const CompositeMergeItem& rhs);
+};
+
+} // namespace internal
+
+/// \brief The GenomicIntervalCompositeBamReader class provides read access to
+/// multipe %BAM files, limiting results to a genomic region.
+///
+/// Requires a ".bai" file for each input %BAM file.
+///
+/// Results will be returned in order of genomic coordinate (first by reference
+/// ID, then by position).
+///
+class PBBAM_EXPORT GenomicIntervalCompositeBamReader
+{
+public:
+ /// \name Contstructors & Related Methods
+ /// \{
+
+ GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+ const std::vector<BamFile>& bamFiles);
+ GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+ std::vector<BamFile>&& bamFiles);
+ GenomicIntervalCompositeBamReader(const GenomicInterval& interval,
+ const DataSet& dataset);
+
+ /// \}
+
+public:
+ /// \name Data Access
+ /// \{
+
+ /// Fetches next BAM record in the interval specified, storing in \p record
+ ///
+ /// \param[out] record
+ /// \returns true on success, false if no more data available.
+ ///
+ bool GetNext(BamRecord& record);
+
+ /// Sets a new genomic interval of interest.
+ ///
+ /// \returns reference to this reader
+ ///
+ GenomicIntervalCompositeBamReader& Interval(const GenomicInterval& interval);
+
+ /// \returns the current specified interval
+ ///
+ const GenomicInterval& Interval(void) const;
+
+ /// \}
+
+private:
+ void UpdateSort(void);
+
+private:
+ GenomicInterval interval_;
+ std::deque<internal::CompositeMergeItem> mergeItems_;
+ std::vector<std::string> filenames_;
+};
+
+/// \brief Provides read access to multipe %BAM files, limiting results to those
+/// passing a PbiFilter.
+///
+/// Requires a ".pbi" file for each input %BAM file.
+///
+/// \note The template parameter OrderByType is not fully implemented at this
+/// time. Use of comparison functor (e.g. Compare::Zmw) for this will
+/// currently result in the proper "next" value <b> at each iteration
+/// step, independently, but not over the full data set. </b> If all
+/// files' "order-by" data values are accessible in increasing order
+/// within each file, then the expected ordering will be observed,
+/// However, if these data are not sorted within a file, the final results
+/// will appear unordered. \n
+/// \n
+/// Example:\n
+/// file 1: { 1, 5, 2, 6 } \n
+/// file 2: { 3, 8, 4, 7 } \n
+/// results: { 1, 3, 5, 2, 6, 8, 4, 7 } \n
+/// \n
+/// This a known issue and will be addressed in a future update. But in
+/// the meantime, use of Compare::None as the OrderByType is recommended,
+/// to explicitly indicate that no particular ordering is expected.
+///
+template<typename OrderByType>
+class PBBAM_EXPORT PbiFilterCompositeBamReader
+{
+public:
+ typedef internal::CompositeMergeItem value_type;
+ typedef internal::CompositeMergeItemSorter<OrderByType> merge_sorter_type;
+ typedef std::deque<value_type> container_type;
+ typedef typename container_type::iterator iterator;
+ typedef typename container_type::const_iterator const_iterator;
+
+public:
+ /// \name Contstructors & Related Methods
+ /// \{
+
+ PbiFilterCompositeBamReader(const PbiFilter& filter,
+ const std::vector<BamFile>& bamFiles);
+ PbiFilterCompositeBamReader(const PbiFilter& filter,
+ std::vector<BamFile>&& bamFiles);
+ PbiFilterCompositeBamReader(const PbiFilter& filter,
+ const DataSet& dataset);
+
+ /// \}
+
+public:
+ /// \name Data Access
+ /// \{
+
+ /// Fetches next BAM record in the interval specified.
+ ///
+ /// \returns true on success, false if no more data available.
+ ///
+ bool GetNext(BamRecord& record);
+
+ /// Sets a new PBI filter
+ ///
+ /// \returns reference to this reader
+ ///
+ PbiFilterCompositeBamReader& Filter(const PbiFilter& filter);
+
+ /// \}
+
+private:
+ void UpdateSort(void);
+
+private:
+ container_type mergeQueue_;
+ std::vector<std::string> filenames_;
+};
+
+/// \brief The SequentialCompositeBamReader class provides read access to
+/// multiple %BAM files, reading through the entire contents of each
+/// file.
+///
+/// Input files will be accessed in the order provided to the constructor. Each
+/// file's contents will be exhausted before moving on to the next one (as
+/// opposed to a "round-robin" scheme).
+///
+class PBBAM_EXPORT SequentialCompositeBamReader
+{
+public:
+ /// \name Contstructors & Related Methods
+ /// \{
+
+ SequentialCompositeBamReader(const std::vector<BamFile>& bamFiles);
+ SequentialCompositeBamReader(std::vector<BamFile>&& bamFiles);
+ SequentialCompositeBamReader(const DataSet& dataset);
+
+ /// \}
+
+public:
+ /// \name Data Access
+ /// \{
+
+ /// Fetches next BAM record from the .
+ ///
+ /// \returns true on success, false if no more data available.
+ ///
+ bool GetNext(BamRecord& record);
+
+ /// \}
+
+private:
+ std::deque<std::unique_ptr<BamReader> > readers_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/CompositeBamReader.inl"
+
+#endif // COMPOSITEBAMREADER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Config.h
+/// \brief Defines library-wide macros & global variables.
+//
+// Author: Derek Barnett
+
+#ifndef PBBAM_CONFIG_H
+#define PBBAM_CONFIG_H
+
+#include <cstdint>
+
+#ifndef INT8_MAX
+#define INT8_MAX 127
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX 32767
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX 2147483647
+#endif
+#ifndef INT64_MAX
+#define INT64_MAX 9223372036854775807LL
+#endif
+#ifndef INT8_MIN
+#define INT8_MIN -128
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN -32768
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN (-INT32_MAX-1)
+#endif
+#ifndef INT64_MIN
+#define INT64_MIN (-INT64_MAX-1)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX 255
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX 65535
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX 4294967295U
+#endif
+#ifndef UINT64_MAX
+#define UINT64_MAX 18446744073709551615ULL
+#endif
+
+/// \name Library Import/Export
+/// \{
+
+#ifndef PBBAM_LIBRARY_EXPORT
+# if defined(WIN32)
+# define PBBAM_LIBRARY_EXPORT __declspec(dllexport)
+# else
+# define PBBAM_LIBRARY_EXPORT __attribute__((visibility("default")))
+# endif
+#endif
+
+#ifndef PBBAM_LIBRARY_IMPORT
+# if defined(WIN32)
+# define PBBAM_LIBRARY_IMPORT __declspec(dllimport)
+# else
+# define PBBAM_LIBRARY_IMPORT
+# endif
+#endif
+
+#ifndef PBBAM_EXPORT
+# if defined(PBBAM_LIBRARY)
+# define PBBAM_EXPORT PBBAM_LIBRARY_EXPORT
+# else
+# define PBBAM_EXPORT PBBAM_LIBRARY_IMPORT
+# endif
+#endif
+
+/// \}
+
+/// \name Shared Pointer Settings
+/// \{
+
+// uncomment this define, or pass via command-line (-DPBBAM_USE_BOOST_SHARED_PTR),
+// to use boost::shared_ptr<T> instead of std::shared_ptr<T>
+//
+//#define PBBAM_USE_BOOST_SHARED_PTR
+
+#ifdef PBBAM_USE_BOOST_SHARED_PTR
+# include <boost/smart_ptr/shared_ptr.hpp>
+# define PBBAM_SHARED_PTR boost::shared_ptr
+#else
+# include <memory>
+# define PBBAM_SHARED_PTR std::shared_ptr
+#endif
+
+/// \}
+
+/// \name Class Definition Helpers
+/// \{
+
+/// \brief Disables the use of copy constructors and assignment operators for a
+/// class.
+///
+/// To use, place the macro in a class's private section:
+/// \code{.cpp}
+/// struct Foo {
+/// private:
+/// DISABLE_COPY(Foo);
+/// };
+/// \endcode
+///
+#ifndef DISABLE_COPY
+#define DISABLE_COPY(Class) \
+ Class(const Class&); \
+ Class& operator=(const Class&)
+#endif
+
+/// \brief Disables the use of move constructors and assignment operators for a
+/// class.
+///
+/// To use, place the macro in a class's private section:
+/// \code{.cpp}
+/// struct Foo {
+/// private:
+/// DISABLE_MOVE(Foo);
+/// };
+/// \endcode
+///
+#ifndef DISABLE_MOVE
+#define DISABLE_MOVE(Class) \
+ Class(Class&&); \
+ Class& operator=(Class&&);
+#endif
+
+/// \brief Disables the use of copy & move constructors and assignment operators f
+/// or a class.
+///
+/// To use, place the macro in a class's private section:
+/// \code{.cpp}
+/// struct Foo {
+/// private:
+/// DISABLE_MOVE_AND_COPY(Foo);
+/// };
+/// \endcode
+///
+#ifndef DISABLE_MOVE_AND_COPY
+#define DISABLE_MOVE_AND_COPY(Class) \
+ DISABLE_MOVE(Class) \
+ DISABLE_COPY(Class)
+#endif
+
+/// \}
+
+// \brief Auto-validation
+//
+// To validate BAM components (header, records, etc.) you can either use the
+// Validator API provided, or enable auto-validation. To compile pbbam for
+// auto-validation, add the -DPacBioBAM_auto_validate=ON option to your cmake
+// invocation.
+//
+//
+#ifndef PBBAM_AUTOVALIDATE
+# define PBBAM_AUTOVALIDATE 0
+#endif
+
+/// \}
+
+namespace PacBio {
+namespace BAM {
+
+/// \name Verbosity Settings
+/// \{
+
+/// \brief Sets the desired verbosity level of htslib warnings.
+///
+/// Change this value to allow debug/warning statements from htslib itself.
+/// The valid range seems to be [0-3], where 0 indicates OFF, and 3 is the
+/// most verbose.
+///
+/// By default, pbbam disables htslib statements to keep output channels clean.
+/// We rely on exceptions & their associated messages instead.
+///
+/// This global variable is obviously not thread-safe by any means. But as a
+/// debug flag, it is unlikely to cause any real issues. The worst case would be
+/// unexpected presence/absence of output statements.
+///
+extern int HtslibVerbosity;
+
+/// \}
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // PBBAM_CONFIG_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file DataSet.h
+/// \brief Defines the DataSet class.
+//
+// Author: Derek Barnett
+
+#ifndef DATASET_H
+#define DATASET_H
+
+#include "pbbam/BamFile.h"
+#include "pbbam/Config.h"
+#include "pbbam/DataSetTypes.h"
+#include <chrono>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The DataSet class represents a %PacBio analyis dataset (e.g. from
+/// XML).
+///
+/// \nosubgrouping
+///
+/// It provides resource paths, filters, and metadata associated with a dataset
+/// under analysis.
+///
+class PBBAM_EXPORT DataSet
+{
+public:
+ /// \name DataSet Type
+ /// \{
+
+ /// \brief This enum defines the currently-supported DataSet types.
+ ///
+ enum TypeEnum {
+ GENERIC = 0
+ , ALIGNMENT
+ , BARCODE
+ , CONSENSUS_ALIGNMENT
+ , CONSENSUS_READ
+ , CONTIG
+ , HDF_SUBREAD
+ , REFERENCE
+ , SUBREAD
+ };
+
+ /// \brief Converts printable dataset type to type enum.
+ ///
+ /// \param[in] typeName printable dataset type
+ /// \returns dataset type enum
+ /// \throws std::runtime_error if \p typeName is unknown
+ ///
+ static DataSet::TypeEnum NameToType(const std::string& typeName);
+
+ /// \brief Converts dataset type enum to printable name.
+ ///
+ /// \param[in] type dataset type enum
+ /// \returns printable dataset type
+ /// \throws std::runtime_error if \p type is unknown
+ ///
+ static std::string TypeToName(const DataSet::TypeEnum& type);
+
+ /// \}
+
+public:
+
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Constructs an empty, generic DataSet.
+ ///
+ DataSet(void);
+
+ /// \brief Constructs an empty DataSet of the type specified.
+ ///
+ /// \param[in] type dataset type
+ /// \throws std::runtime_error if \p type is unknown
+ ///
+ DataSet(const DataSet::TypeEnum type);
+
+ /// \brief Constructs a DataSet from a %BAM file.
+ ///
+ /// This currently defaults to a SubreadSet, with an ExternalResource
+ /// pointing to BamFile::Filename.
+ ///
+ /// \param[in] bamFile BamFile object
+ ///
+ DataSet(const BamFile& bamFile);
+
+ /// \brief Loads a DataSet from a file.
+ ///
+ /// \p filename may be one of three types, indicated by its extension:\n
+ /// - %BAM ("*.bam") \n
+ /// - FOFN ("*.fofn") \n
+ /// - DataSetXML ("*.xml") \n
+ ///
+ /// \param[in] filename input filename
+ /// \throws std::runtime_error if \p filename has an unsupported extension,
+ /// or if a valid DataSet could not be created from its contents
+ ///
+ DataSet(const std::string& filename);
+
+ /// \brief Constructs a DataSet from a list of files.
+ ///
+ /// \param[in] filenames input filenames
+ /// \throws std::runtime_error if DataSet could not be created from
+ /// \p filenames
+ ///
+ DataSet(const std::vector<std::string>& filenames);
+
+ DataSet(const DataSet& other);
+ DataSet(DataSet&& other);
+ DataSet& operator=(const DataSet& other);
+ DataSet& operator=(DataSet&& other);
+ ~DataSet(void);
+
+ /// \brief Creates a DataSet from "raw" XML data.
+ ///
+ /// \param[in] xml DataSetXML text
+ ///
+ static DataSet FromXml(const std::string& xml);
+
+ /// \}
+
+public:
+ /// \name Operators
+ /// \{
+
+ /// \brief Merges DataSet contents.
+ ///
+ /// Adds contents of \p other to this dataset object
+ ///
+ /// \param[in] other some other dataset to add to this one
+ /// \returns reference to this dataset object
+ ///
+ DataSet& operator+=(const DataSet& other);
+
+ /// \}
+
+public:
+ /// \name Serialization
+ /// \{
+
+ /// \brief Saves dataset XML to file.
+ ///
+ /// \param[in] outputFilename destination for XML contents
+ ///
+ /// \throws std::runtime_error if file could be opened or if DataSet
+ /// elements could not be converted to XML
+ ///
+ void Save(const std::string& outputFilename);
+
+ /// \brief Saves dataset XML to output stream, e.g. std::cout,
+ /// std::stringstream.
+ ///
+ /// \param[out] out destination for XML contents
+ ///
+ /// \throws std::runtime_error if DataSet elements could not be converted to
+ /// XML
+ ///
+ void SaveToStream(std::ostream& out);
+
+ /// \}
+
+public:
+
+ /// \name Attributes
+ /// \{
+ ///
+
+ /// \brief Fetches the value of a DataSet root element's attribute.
+ ///
+ /// These are the attributes attached to the root dataset element: \n
+ /// \verbatim <SubreadSet foo="x" bar="y" /> \endverbatim
+ ///
+ /// Built-in accessors exist for the standard attributes (e.g. CreatedAt)
+ /// but additional attributes can be used as well via these generic
+ /// Attribute methods.
+ ///
+ /// \param[in] name root element's attribute name
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
+ const std::string& Attribute(const std::string& name) const;
+
+ /// \brief Fetches the value of dataset's CreatedAt attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
+ const std::string& CreatedAt(void) const;
+
+ /// \brief Fetches the value of dataset's Format attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
+ const std::string& Format(void) const;
+
+ /// \brief Fetches the value of dataset's MetaType attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
+ const std::string& MetaType(void) const;
+
+ /// \brief Fetches the value of dataset's ModifiedAt attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
+ const std::string& ModifiedAt(void) const;
+
+ /// \brief Fetches the value of dataset's Name attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
+ const std::string& Name(void) const;
+
+ /// \brief Fetches the value of dataset's ResourceId attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
+ const std::string& ResourceId(void) const;
+
+ /// \brief Fetches the value of dataset's Tags attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
+ const std::string& Tags(void) const;
+
+ /// \brief Fetches the value of dataset's TimeStampedName attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
+ const std::string& TimeStampedName(void) const;
+
+ /// \brief Fetches the value of dataset's UniqueId attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
+ const std::string& UniqueId(void) const;
+
+ /// \brief Fetches the value of dataset's Version attribute.
+ ///
+ /// \returns const reference to attribute's value (empty string if not
+ /// present)
+ ///
+ const std::string& Version(void) const;
+
+ /// \}
+
+public:
+ /// \name DataSet Type
+ /// \{
+
+ /// \brief Fetches the dataset's type.
+ ///
+ /// \returns dataset type enum
+ ///
+ PacBio::BAM::DataSet::TypeEnum Type(void) const;
+
+ /// \brief Fetches the dataset's type.
+ ///
+ /// \returns printable dataset type
+ ///
+ std::string TypeName(void) const;
+
+ /// \}
+
+public:
+ /// \name Child Elements
+ /// \{
+
+ /// \brief Fetches the dataset's Extensions element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
+ const PacBio::BAM::Extensions& Extensions(void) const;
+
+ /// \brief Fetches the dataset's ExternalResources element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
+ const PacBio::BAM::ExternalResources& ExternalResources(void) const;
+
+ /// \brief Fetches the dataset's Filters element.
+ ///
+ /// \returns const reference to child element
+ ///
+ const PacBio::BAM::Filters& Filters(void) const;
+
+ /// \brief Fetches the dataset's DataSetMetadata element.
+ ///
+ /// \returns const reference to child element
+ ///
+ const PacBio::BAM::DataSetMetadata& Metadata(void) const;
+
+ /// \brief Fetches the dataset's DataSets element.
+ ///
+ /// \returns const reference to child element
+ ///
+ const PacBio::BAM::SubDataSets& SubDataSets(void) const;
+
+ /// \}
+
+public:
+ /// \name Resource Handling
+ /// \{
+
+ /// \brief Returns this dataset's primary %BAM resources, with relative
+ /// filepaths already resolved.
+ ///
+ /// Primary resources are those listed as top-level %ExternalResources, not
+ /// associated files (indices, references, scraps %BAMs, etc.).
+ ///
+ /// \returns vector of BamFiles
+ ///
+ /// \sa DataSet::ResolvedResourceIds
+ ///
+ std::vector<BamFile> BamFiles(void) const;
+
+ /// \brief Returns all primary external resource filepaths, with relative
+ /// paths resolved.
+ ///
+ /// Primary resources are those listed as top-level %ExternalResources, not
+ /// associated files (indices, references, scraps %BAMs, etc.).
+ ///
+ /// \sa ResolvePath
+ ///
+ /// \returns resourceIds
+ ///
+ std::vector<std::string> ResolvedResourceIds(void) const;
+
+ /// \brief Resolves a filepath (that may be relative to the dataset).
+ ///
+ /// A DataSet's resources may be described using absolute filepaths or with
+ /// relative paths. For absolute paths, nothing is changed from the input.
+ /// For relative paths, these are resolved using the DataSet's own path
+ /// as a starting point. A DataSet's own path will be one of:\n
+ /// 1 - the location of its XML or %BAM input file, e.g. created using
+ /// DataSet("foo.xml") or DataSet("foo.bam")\n
+ /// 2 - application's current working directory for all other DataSet
+ /// construction methods { DataSet(), DataSet(type),
+ /// DataSet("foo.fofn") }\n
+ ///
+ /// \param[in] originalPath input file path (absolute or relative)
+ /// \returns resolved path
+ ///
+ std::string ResolvePath(const std::string& originalPath) const;
+
+ /// \returns sequence chemistry info for all read groups in this dataset
+ ///
+ /// \sa ReadGroupInfo::SequencingChemistry
+ ///
+ std::set<std::string> SequencingChemistries(void) const;
+
+ /// \}
+
+public:
+ /// \name XML Namespace Handling
+ /// \{
+
+ /// \brief Access this dataset's namespace info.
+ ///
+ /// \returns const reference to dataset's NamespaceRegistry
+ ///
+ const NamespaceRegistry& Namespaces(void) const;
+
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ /// \brief Fetches the value of a DataSet root element's attribute.
+ ///
+ /// These are the attributes attached to the root dataset element: \n
+ /// \verbatim <SubreadSet foo="x" bar="y" /> \endverbatim
+ ///
+ /// Built-in accessors exist for the standard attributes (e.g. CreatedAt)
+ /// but additional attributes can be used as well via these generic methods.
+ ///
+ /// A new attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] name root element's attribute name
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
+ std::string& Attribute(const std::string& name);
+
+ /// \brief Fetches the value of dataset's CreatedAt attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
+ std::string& CreatedAt(void);
+
+ /// \brief Fetches the value of dataset's Format attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
+ std::string& Format(void);
+
+ /// \brief Fetches the value of dataset's MetaType attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
+ std::string& MetaType(void);
+
+ /// \brief Fetches the value of dataset's ModifiedAt attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
+ std::string& ModifiedAt(void);
+
+ /// \brief Fetches the value of dataset's Name attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
+ std::string& Name(void);
+
+ /// \brief Fetches the value of dataset's ResourceId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
+ std::string& ResourceId(void);
+
+ /// \brief Fetches the value of dataset's Tags attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
+ std::string& Tags(void);
+
+ /// \brief Fetches the value of dataset's TimeStampedName attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
+ std::string& TimeStampedName(void);
+
+ /// \brief Fetches the value of dataset's UniqueId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
+ std::string& UniqueId(void);
+
+ /// \brief Fetches the value of dataset's Version attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute's value (empty string if this
+ /// is a new attribute)
+ ///
+ std::string& Version(void);
+
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ /// \brief Sets this dataset's XML attribute \p name, with \p value
+ ///
+ /// These are the attributes attached to the root dataset element: \n
+ /// \verbatim <SubreadSet foo="x" bar="y" /> \endverbatim
+ ///
+ /// Built-in accessors exist for the standard attributes (e.g. CreatedAt)
+ /// but additional attributes can be used as well via these generic methods.
+ ///
+ /// The attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] name root element's attribute name
+ /// \param[in] value new value for the attribute
+ /// \returns reference to this dataset object
+ ///
+ DataSet& Attribute(const std::string& name, const std::string& value);
+
+ /// \brief Sets this dataset's CreatedAt attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] createdAt new value for the attribute
+ /// \returns reference to this dataset object
+ ///
+ DataSet& CreatedAt(const std::string& createdAt);
+
+ /// \brief Sets this dataset's Format attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] format new value for the attribute
+ /// \returns reference to this dataset object
+ ///
+ DataSet& Format(const std::string& format);
+
+ /// \brief Sets this dataset's MetaType attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] metatype new value for the attribute
+ /// \returns reference to this dataset object
+ ///
+ DataSet& MetaType(const std::string& metatype);
+
+ /// \brief Sets this dataset's ModifiedAt attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] modifiedAt new value for the attribute
+ /// \returns reference to this dataset object
+ ///
+ DataSet& ModifiedAt(const std::string& modifiedAt);
+
+ /// \brief Sets this dataset's Name attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] name new value for the attribute
+ /// \returns reference to this dataset object
+ ///
+ DataSet& Name(const std::string& name);
+
+ /// \brief Sets this dataset's ResourceId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] resourceId new value for the attribute
+ /// \returns reference to this dataset object
+ ///
+ DataSet& ResourceId(const std::string& resourceId);
+
+ /// \brief Sets this dataset's Tags attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] tags new value for the attribute
+ /// \returns reference to this dataset object
+ ///
+ DataSet& Tags(const std::string& tags);
+
+ /// \brief Sets this dataset's TimeStampedName attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] timeStampedName new value for the attribute
+ /// \returns reference to this dataset object
+ ///
+ DataSet& TimeStampedName(const std::string& timeStampedName);
+
+ /// \brief Sets this dataset's UniqueId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] uuid new value for the attribute
+ /// \returns reference to this dataset object
+ ///
+ DataSet& UniqueId(const std::string& uuid);
+
+ /// \brief Sets this dataset's Version attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] version new value for the attribute
+ /// \returns reference to this dataset object
+ ///
+ DataSet& Version(const std::string& version);
+
+ /// \}
+
+public:
+ /// \name DataSet Type
+ /// \{
+
+ /// \brief Edits dataset type.
+ ///
+ /// \param[in] type new dataset type
+ /// \returns reference to this dataset object
+ ///
+ DataSet& Type(const PacBio::BAM::DataSet::TypeEnum type);
+
+ /// \}
+
+public:
+ /// \name Child Elements
+ /// \{
+
+ /// \brief Fetches the dataset's Extensions element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::Extensions& Extensions(void);
+
+ /// \brief Fetches the dataset's ExternalResources element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::ExternalResources& ExternalResources(void);
+
+ /// \brief Fetches the dataset's Filters element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::Filters& Filters(void);
+
+ /// \brief Fetches the dataset's DataSetMetadata element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::DataSetMetadata& Metadata(void);
+
+ /// \brief Fetches the dataset's DataSets element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::SubDataSets& SubDataSets(void);
+
+ /// \}
+
+public:
+ /// \name Child Elements
+ /// \{
+
+ /// \brief Sets this dataset's Extensions element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] extensions new value for the element
+ /// \returns reference to this dataset object
+ ///
+ DataSet& Extensions(const PacBio::BAM::Extensions& extensions);
+
+ /// \brief Sets this dataset's ExternalResources element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] resources new value for the element
+ /// \returns reference to this dataset object
+ ///
+ DataSet& ExternalResources(const PacBio::BAM::ExternalResources& resources);
+
+ /// \brief Sets this dataset's Filters element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] filters new value for the element
+ /// \returns reference to this dataset object
+ ///
+ DataSet& Filters(const PacBio::BAM::Filters& filters);
+
+ /// \brief Sets this dataset's DataSetMetadata element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] metadata new value for the element
+ /// \returns reference to this dataset object
+ ///
+ DataSet& Metadata(const PacBio::BAM::DataSetMetadata& metadata);
+
+ /// \brief Sets this dataset's DataSets element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] subdatasets new value for the element
+ /// \returns reference to this dataset object
+ ///
+ DataSet& SubDataSets(const PacBio::BAM::SubDataSets& subdatasets);
+
+ /// \}
+
+public:
+ /// \name XML Namespace Handling
+ /// \{
+
+ /// \brief Access this dataset's namespace info.
+ ///
+ /// \returns non-const reference to dataset's NamespaceRegistry
+ ///
+ NamespaceRegistry& Namespaces(void);
+
+ /// \}
+
+private:
+ std::unique_ptr<DataSetBase> d_;
+ std::string path_;
+};
+
+/// \name DataSet Timestamp Utilities
+/// \{
+
+/// \brief Fetches current time, in "DataSetXML format".
+///
+/// \returns DataSetXML formatted timestamp
+///
+/// \sa ToDataSetFormat
+///
+PBBAM_EXPORT std::string CurrentTimestamp(void);
+
+/// \brief Converts a time_point to "DataSetXML-formatted" timestamp.
+///
+/// This is the format used as a component of the DataSet::TimeStampedName
+/// (yymmdd_HHmmssttt>.
+///
+/// \returns "DataSetXML-formatted" timestamp
+///
+PBBAM_EXPORT std::string ToDataSetFormat(const std::chrono::system_clock::time_point& tp);
+
+/// \brief Converts a time_t to "DataSetXML-formatted" timestamp.
+///
+/// This is the format used as a component of the DataSet::TimeStampedName
+/// (yymmdd_HHmmssttt>.
+///
+/// \returns "DataSetXML-formatted" timestamp
+///
+PBBAM_EXPORT std::string ToDataSetFormat(const time_t& tp);
+
+/// \brief Converts a time_point to ISO-8601 formatted timestamp.
+///
+/// This is the format used in DataSet::CreatedAt and DataSet::ModifiedAt.
+///
+/// \returns ISO-8601 formatted timestamp
+///
+PBBAM_EXPORT std::string ToIso8601(const std::chrono::system_clock::time_point& tp);
+
+/// \brief Converts a time_t to ISO-8601 formatted timestamp.
+///
+/// This is the format used in DataSet::CreatedAt and DataSet::ModifiedAt.
+///
+/// \returns ISO-8601 formatted timestamp
+///
+PBBAM_EXPORT std::string ToIso8601(const time_t& t);
+
+/// \}
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/DataSet.inl"
+
+#endif // DATASET_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file DataSetTypes.h
+/// \brief Defines the public DataSet component classes.
+//
+// Author: Derek Barnett
+
+#ifndef DATASETTYPES_H
+#define DATASETTYPES_H
+
+#include "pbbam/BamFile.h"
+#include "pbbam/Config.h"
+#include "pbbam/DataSetXsd.h"
+#include "pbbam/internal/DataSetBaseTypes.h"
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The DataSetMetadata class represents the %DataSetMetadata child
+/// element in DataSetXML.
+///
+/// A few top-level elements are built-in, but as pbbam is not primarily a
+/// DataSetXML API, most of the metadata hierarchy needs to be manually managed.
+///
+class PBBAM_EXPORT DataSetMetadata : public internal::DataSetElement
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Constructs a DataSetMetadata with required fields.
+ DataSetMetadata(const std::string& numRecords,
+ const std::string& totalLength);
+
+ /// \}
+
+public:
+ /// \name Operators
+ /// \{
+
+ /// \brief Merges DataSetMetadata contents.
+ ///
+ /// Adds contents of \p other to this metadata object
+ ///
+ /// \param[in] other some other metadata to add to this one
+ /// \returns reference to this object
+ ///
+ DataSetMetadata& operator+=(const DataSetMetadata& other);
+
+ /// \}
+
+public:
+ /// \name Child Elements
+ /// \{
+
+ /// \brief Fetches the text of the NumRecords element.
+ ///
+ /// \returns const reference to element text (empty string if not present)
+ ///
+ const std::string& NumRecords(void) const;
+
+ /// \brief Fetches the text of the TotalLength element.
+ ///
+ /// \returns const reference to element text (empty string if not present)
+ ///
+ const std::string& TotalLength(void) const;
+
+ /// \brief Fetches the Provenance element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
+ const PacBio::BAM::Provenance& Provenance(void) const;
+
+ /// \}
+
+public:
+ /// \name Child Elements
+ /// \{
+
+ /// \brief Fetches the text of the NumRecords element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to element text
+ ///
+ std::string& NumRecords(void);
+
+ /// \brief Fetches the text of the TotalLength element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to element text
+ ///
+ std::string& TotalLength(void);
+
+ /// \brief Fetches Provenance element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::Provenance& Provenance(void);
+
+ /// \}
+
+public:
+ /// \name Child Elements
+ /// \{
+
+ /// \brief Sets the text of the NumRecords element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns reference to this metadata object
+ ///
+ DataSetMetadata& NumRecords(const std::string& numRecords);
+
+ /// \brief Sets the text of the TotalLength element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns reference to this metadata object
+ ///
+ DataSetMetadata& TotalLength(const std::string& totalLength);
+
+ /// \brief Sets the Provenance child element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns reference to this metadata object
+ ///
+ DataSetMetadata& Provenance(const PacBio::BAM::Provenance& provenance);
+
+ /// \}
+};
+
+/// \brief The ExtensionElement class represents an %ExtensionElement element in
+/// DataSetXML.
+///
+class PBBAM_EXPORT ExtensionElement : public internal::DataSetElement {
+public:
+ ExtensionElement(void);
+};
+
+/// \brief The Extensions class represents an %Extensions element in DataSetXML.
+///
+/// The Extensions element is essentially just a list of ExtensionElement
+/// objects.
+///
+class PBBAM_EXPORT Extensions : public internal::DataSetListElement<ExtensionElement>
+{
+public:
+ /// \brief Creates an empty extensions list.
+ Extensions(void);
+};
+
+class ExternalResources;
+
+/// \brief The ExternalResource class represents an %ExternalResource element in
+/// DataSetXML.
+///
+/// An ExternalResource can itself have a child element, ExternalResources, that
+/// lists related files (e.g. index files).
+///
+class PBBAM_EXPORT ExternalResource : public internal::IndexedDataType
+{
+public:
+ /// \brief Creates an ExternalResource from a BamFile object.
+ ///
+ /// The metatype & resourceId are automatically set.
+ ///
+ ExternalResource(const BamFile& bamFile);
+
+ /// \brief Creates an ExternalResource with provided \p metatype and
+ /// \p filename as resource ID.
+ ///
+ ExternalResource(const std::string& metatype,
+ const std::string& filename);
+
+public:
+ /// \brief Fetches the resource's ExternalResources child element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
+ const PacBio::BAM::ExternalResources& ExternalResources(void) const;
+
+public:
+ /// \brief Fetches the resource's ExternalResources child element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::ExternalResources& ExternalResources(void);
+
+ /// \brief Sets this resource's ExternalResources child element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] resources new value for the element
+ /// \returns reference to this resource object
+ ///
+ ExternalResource& ExternalResources(const PacBio::BAM::ExternalResources& resources);
+
+public:
+ /// \brief Converts an ExternalResource to a BamFile object
+ ///
+ /// \returns corresponding BamFile object for this ExternalResource
+ /// \throws std::runtime_error if fails to open %BAM file (e.g. does not
+ /// exist, not a %BAM file, etc.)
+ ///
+ /// \deprecated Use the results from DataSet::BamFiles instead. This method
+ /// cannot resolve relative filepaths and will be removed in the
+ /// near future.
+ ///
+ BamFile ToBamFile(void) const;
+};
+
+/// \brief The ExternalResources class represents an %ExternalResources element
+/// in DataSetXML.
+///
+/// The ExternalResources element is essentially just a list of ExternalResource
+/// elements.
+///
+class PBBAM_EXPORT ExternalResources : public internal::DataSetListElement<ExternalResource>
+{
+public:
+ /// \brief Creates an empty resource list.
+ ExternalResources(void);
+
+ /// \brief Merges \p other resource list with this one.
+ ExternalResources& operator+=(const ExternalResources& other);
+
+public:
+ /// \brief Adds an ExternalResource to this list.
+ void Add(const ExternalResource& ext);
+
+ /// \brief Removes an ExternalResource from this list.
+ void Remove(const ExternalResource& ext);
+
+public:
+ /// \brief Converts resource list to BamFile objects.
+ ///
+ /// \deprecated Use DataSet::BamFiles instead. This method cannot resolve
+ /// relative filepaths and will be removed in the near future.
+ ///
+ std::vector<BamFile> BamFiles(void) const;
+};
+
+/// \brief The FileIndex class represents a %FileIndex element in DataSetXML.
+///
+/// A FileIndex is used as an auxiliary to an ExternalResource, providing
+/// information about a data file's index file (e.g. for %BAM files, *.bai or
+/// *.pbi).
+///
+class PBBAM_EXPORT FileIndex : public internal::InputOutputDataType
+{
+public:
+ /// \brief Creates a FileIndex with provided \p metatype and \p filename as
+ /// resource ID.
+ ///
+ FileIndex(const std::string& metatype,
+ const std::string& filename);
+};
+
+/// \brief The FileIndices class represents a %FileIndices element in DataSetXML.
+///
+/// The FileIndices element is essentially just a list of FileIndex elements,
+/// providing information about a data file's index files (e.g. for %BAM files
+/// this will usually be *.bai and/or *.pbi).
+///
+class PBBAM_EXPORT FileIndices : public internal::DataSetListElement<FileIndex>
+{
+public:
+ /// \brief Creates an empty index list.
+ FileIndices(void);
+
+public:
+ /// \brief Adds a FileIndex to this list.
+ void Add(const FileIndex& index);
+
+ /// \brief Removes a FileIndex from this list.
+ void Remove(const FileIndex& index);
+};
+
+/// \brief The Filter class represents a %Filter element in DataSetXML.
+///
+/// The Filter element allows analysis pipelines to describe filters on data
+/// that should be respected downstream, without needing to create filtered
+/// intermediate files.
+///
+/// A filter consists of a list of Property elements, each of which must be
+/// passed (logical AND) to pass the filter, e.g. property1 && property2 &&
+/// property3.
+///
+class PBBAM_EXPORT Filter : public internal::DataSetElement
+{
+public:
+ /// \brief Creates an empty filter.
+ Filter(void);
+
+public:
+ /// \brief Fetches the filter's property list element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
+ const PacBio::BAM::Properties& Properties(void) const;
+
+public:
+ /// \brief Fetches the filter's property list child element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::Properties& Properties(void);
+
+ /// \brief Sets this filter's Properties child element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] properties new value for the element
+ /// \returns reference to this filter object
+ ///
+ Filter& Properties(const PacBio::BAM::Properties& properties);
+};
+
+/// \brief The Filters class represents a %Filters list element in DataSetXML.
+///
+/// The Filters element is essentially a list of Filter elements. For analysis
+/// purpose, each filter is considered separately (logical OR) to consider which
+/// data passes, e.g. filter1 || filter2 || filter3.
+///
+class PBBAM_EXPORT Filters : public internal::DataSetListElement<Filter>
+{
+public:
+ /// \brief Creates an empty filter list.
+ Filters(void);
+
+ /// \brief Merges \p other filter list with this one.
+ Filters& operator+=(const Filters& other);
+
+public:
+ /// \brief Adds a filter to this list.
+ void Add(const Filter& filter);
+
+ /// \brief Removes a filter from this list.
+ void Remove(const Filter& filter);
+};
+
+/// \brief The ParentTool class represents a %ParentTool element in DataSetXML.
+///
+class PBBAM_EXPORT ParentTool : public internal::BaseEntityType {
+public:
+ /// \brief Creates an empty %ParentTool element.
+ ParentTool(void);
+};
+
+/// \brief The Property class represents a %Property element in DataSetXML.
+///
+/// A Property is the primary building block of %DataSetXML filtering. The
+/// %Property element describes a data record's property (or field), some value,
+/// and a comparison operator.
+///
+/// For example, one could filter all %BAM records with a read accuracy at or
+/// above 0.9. In C++ this could be constructed like:
+/// \code{.cpp}
+/// Property p("accuracy", "0.9", ">=");
+/// \endcode
+///
+class PBBAM_EXPORT Property : public internal::DataSetElement
+{
+public:
+ /// \brief Constructs a filter property.
+ Property(const std::string& name,
+ const std::string& value,
+ const std::string& op);
+
+public:
+
+ /// \brief Fetches the value of property's Name attribute.
+ ///
+ /// \returns const reference to attribute value
+ ///
+ const std::string& Name(void) const;
+
+ /// \brief Fetches the value of property's Operator attribute.
+ ///
+ /// \returns const reference to attribute value
+ ///
+ const std::string& Operator(void) const;
+
+ /// \brief Fetches the value of property's Value attribute.
+ ///
+ /// \returns const reference to attribute value
+ ///
+ const std::string& Value(void) const;
+
+public:
+
+ /// \brief Fetches the value of property's Name attribute.
+ ///
+ /// \returns non-const reference to attribute value
+ ///
+ std::string& Name(void);
+
+ /// \brief Fetches the value of property's Operator attribute.
+ ///
+ /// \returns non-const reference to attribute value
+ ///
+ std::string& Operator(void);
+
+ /// \brief Fetches the value of property's Value attribute.
+ ///
+ /// \returns nonconst reference to attribute value
+ ///
+ std::string& Value(void);
+
+public:
+ /// \brief Sets this property's Name attribute.
+ ///
+ /// \param[in] name new value for the attribute
+ /// \returns reference to this property object
+ ///
+ Property& Name(const std::string& name);
+
+ /// \brief Sets this property's Operator attribute.
+ ///
+ /// \param[in] op new value for the attribute
+ /// \returns reference to this property object
+ ///
+ Property& Operator(const std::string& op);
+
+ /// \brief Sets this property's Value attribute.
+ ///
+ /// \param[in] value new value for the attribute
+ /// \returns reference to this property object
+ ///
+ Property& Value(const std::string& value);
+};
+
+/// \brief The Properties class represents a %Properties list element in
+/// DataSetXML.
+///
+/// The Properties element is essentially a list of Property elements.
+///
+class PBBAM_EXPORT Properties : public internal::DataSetListElement<Property>
+{
+public:
+ /// \brief Creates an empty property list.
+ Properties(void);
+
+public:
+ /// \brief Adds a property to this list.
+ void Add(const Property& property);
+
+ /// \brief Removes a property from this list.
+ void Remove(const Property& property);
+};
+
+/// \brief The Provenance class represents a %Provenance element in DataSetXML.
+///
+class PBBAM_EXPORT Provenance : public internal::DataSetElement
+{
+public:
+ /// \brief Creates a empty provenance element.
+ Provenance(void);
+
+public:
+ /// \brief Fetches the value of CreatedBy attribute.
+ ///
+ /// \returns const reference to attribute value (empty string if not
+ /// present)
+ ///
+ const std::string& CreatedBy(void) const;
+
+ /// \brief Fetches the value of CommonServicesInstanceId attribute.
+ ///
+ /// \returns const reference to attribute value (empty string if not
+ /// present)
+ ///
+ const std::string& CommonServicesInstanceId(void) const;
+
+ /// \brief Fetches the value of CreatorUserId attribute.
+ ///
+ /// \returns const reference to attribute value (empty string if not
+ /// present)
+ ///
+ const std::string& CreatorUserId(void) const;
+
+ /// \brief Fetches the value of ParentJobId attribute.
+ ///
+ /// \returns const reference to attribute value (empty string if not
+ /// present)
+ ///
+ const std::string& ParentJobId(void) const;
+
+ /// \brief Fetches the ParentTool child element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
+ const PacBio::BAM::ParentTool& ParentTool(void) const;
+
+public:
+
+ /// \brief Fetches the value of CreatedBy attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute value (empty string if this is
+ /// a new attribute)
+ ///
+ std::string& CreatedBy(void);
+
+ /// \brief Fetches the value of CommonServicesInstanceId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute value (empty string if this is
+ /// a new attribute)
+ ///
+ std::string& CommonServicesInstanceId(void);
+
+ /// \brief Fetches the value of CreatorUserId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute value (empty string if this is
+ /// a new attribute)
+ ///
+ std::string& CreatorUserId(void);
+
+ /// \brief Fetches the value of ParentJobId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to attribute value (empty string if this is
+ /// a new attribute)
+ ///
+ std::string& ParentJobId(void);
+
+ /// \brief Fetches the ParentTool element element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::ParentTool& ParentTool(void);
+
+public:
+
+ /// \brief Sets the CreatedBy attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] createdBy new value for the attribute
+ /// \returns reference to this object
+ ///
+ Provenance& CreatedBy(const std::string& createdBy);
+
+ /// \brief Sets the CommonServicesInstanceId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] id new value for the attribute
+ /// \returns reference to this object
+ ///
+ Provenance& CommonServicesInstanceId(const std::string& id);
+
+ /// \brief Sets the CreatorUserId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] id new value for the attribute
+ /// \returns reference to this object
+ ///
+ Provenance& CreatorUserId(const std::string& id);
+
+ /// \brief Sets the ParentJobId attribute.
+ ///
+ /// This attribute will be created if it does not yet exist.
+ ///
+ /// \param[in] id new value for the attribute
+ /// \returns reference to this object
+ ///
+ Provenance& ParentJobId(const std::string& id);
+
+ /// \brief Sets the ParentTool child element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] tool new value for the element
+ /// \returns reference to this dataset object
+ ///
+ Provenance& ParentTool(const PacBio::BAM::ParentTool& tool);
+};
+
+class SubDataSets;
+
+/// \brief The DataSetBase class provides the attributes & child elements shared
+/// by all dataset types.
+///
+/// Client code should not need to use this class directly. It should be
+/// considered as more of an implementation detail and may in fact be removed
+/// from public API in the future. The top-level DataSet is the recommended
+/// entry point.
+///
+class PBBAM_EXPORT DataSetBase : public internal::StrictEntityType
+{
+public:
+
+ /// \brief Creates a DataSetBase object, or one of its subclasses, from an
+ /// XML element name (e.g. SubreadSet)
+ ///
+ static std::shared_ptr<DataSetBase> Create(const std::string& typeName);
+
+public:
+ /// \brief Creates an empty, generic DataSetBase.
+ DataSetBase(void);
+
+protected:
+ /// \brief Creates a DataSetBase with key values initialized.
+ DataSetBase(const std::string& metatype,
+ const std::string& label,
+ const XsdType& xsd);
+
+ /// \brief Returns a new DataSetBase containing a deep copy of contents
+ DataSetBase* DeepCopy(void) const;
+
+public:
+ /// \brief Merges dataset contents.
+ ///
+ /// Adds contents of \p other to this dataset object
+ ///
+ /// \param[in] other some other dataset to add to this one
+ /// \returns reference to this dataset object
+ ///
+ DataSetBase& operator+=(const DataSetBase& other);
+
+public:
+ /// \brief Fetches the dataset's ExternalResources element.
+ ///
+ /// \returns const reference to child element
+ /// \throws std::runtime_error if element does not exist
+ ///
+ const PacBio::BAM::ExternalResources& ExternalResources(void) const;
+
+ /// \brief Fetches the dataset's Filters element.
+ ///
+ /// \returns const reference to child element
+ ///
+ const PacBio::BAM::Filters& Filters(void) const;
+
+ /// \brief Fetches the dataset's DataSetMetadata element.
+ ///
+ /// \returns const reference to child element
+ ///
+ const PacBio::BAM::DataSetMetadata& Metadata(void) const;
+
+ /// \brief Fetches the dataset's DataSets element.
+ ///
+ /// \returns const reference to child element
+ ///
+ const PacBio::BAM::SubDataSets& SubDataSets(void) const;
+
+public:
+ /// \brief Access this dataset's namespace info.
+ ///
+ /// \returns const reference to dataset's NamespaceRegistry
+ ///
+ const NamespaceRegistry& Namespaces(void) const;
+
+public:
+ /// \brief Fetches the dataset's ExternalResources element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::ExternalResources& ExternalResources(void);
+
+ /// \brief Fetches the dataset's Filters element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::Filters& Filters(void);
+
+ /// \brief Fetches the dataset's DataSetMetadata element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::DataSetMetadata& Metadata(void);
+
+ /// \brief Fetches the dataset's DataSets element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \returns non-const reference to child element
+ ///
+ PacBio::BAM::SubDataSets& SubDataSets(void);
+
+public:
+ /// \brief Sets this dataset's ExternalResources element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] resources new value for the element
+ /// \returns reference to this dataset object
+ ///
+ DataSetBase& ExternalResources(const PacBio::BAM::ExternalResources& resources);
+
+ /// \brief Sets this dataset's Filters element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] filters new value for the element
+ /// \returns reference to this dataset object
+ ///
+ DataSetBase& Filters(const PacBio::BAM::Filters& filters);
+
+ /// \brief Sets this dataset's DataSetMetadata element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] metadata new value for the element
+ /// \returns reference to this dataset object
+ ///
+ DataSetBase& Metadata(const PacBio::BAM::DataSetMetadata& metadata);
+
+ /// \brief Sets this dataset's DataSets element.
+ ///
+ /// This element will be created if it does not yet exist.
+ ///
+ /// \param[in] subdatasets new value for the element
+ /// \returns reference to this dataset object
+ ///
+ DataSetBase& SubDataSets(const PacBio::BAM::SubDataSets& subdatasets);
+
+public:
+ /// \brief Access this dataset's namespace info.
+ ///
+ /// \returns non-const reference to dataset's NamespaceRegistry
+ ///
+ NamespaceRegistry& Namespaces(void);
+
+private:
+ NamespaceRegistry registry_;
+};
+
+/// \brief The AlignmentSet class represents an %AlignmentSet root element in
+/// DataSetXML.
+///
+class PBBAM_EXPORT AlignmentSet : public DataSetBase
+{
+public:
+ /// \brief Creates an empty AlignmentSet dataset.
+ AlignmentSet(void);
+};
+
+/// \brief The BarcodeSet class represents a %BarcodeSet root element in
+/// DataSetXML.
+///
+class PBBAM_EXPORT BarcodeSet : public DataSetBase
+{
+public:
+ /// \brief Creates an empty BarcodeSet dataset.
+ BarcodeSet(void);
+};
+
+/// \brief The ConsensusAlignmentSet class represents a %ConsensusAlignmentSet
+/// root element in DataSetXML.
+///
+class PBBAM_EXPORT ConsensusAlignmentSet : public DataSetBase
+{
+public:
+ /// \brief Creates an empty ConsensusAlignmentSet dataset.
+ ConsensusAlignmentSet(void);
+};
+
+/// \brief The ConsensusReadSet class represents a %ConsensusReadSet root
+/// element in DataSetXML.
+///
+class PBBAM_EXPORT ConsensusReadSet : public DataSetBase
+{
+public:
+ /// \brief Creates an empty ConsensusReadSet dataset.
+ ConsensusReadSet(void);
+};
+
+/// \brief The ContigSet class represents a %ContigSet root element in
+/// DataSetXML.
+///
+class PBBAM_EXPORT ContigSet : public DataSetBase
+{
+public:
+ /// \brief Creates an empty ContigSet dataset.
+ ContigSet(void);
+};
+
+/// \brief The HdfSubreadSet class represents a %HdfSubreadSet root element in
+/// DataSetXML.
+///
+class PBBAM_EXPORT HdfSubreadSet : public DataSetBase
+{
+public:
+ /// \brief Creates an empty HdfSubreadSet dataset.
+ HdfSubreadSet(void);
+};
+
+/// \brief The ReferenceSet class represents a %ReferenceSet root element in
+/// DataSetXML.
+///
+class PBBAM_EXPORT ReferenceSet : public DataSetBase
+{
+public:
+ /// \brief Creates an empty ReferenceSet dataset.
+ ReferenceSet(void);
+};
+
+/// \brief The SubDataSets class represents a %DataSets list element in
+/// DataSetXML.
+///
+/// The SubDataSets element is essentially a list of DataSets.
+///
+class PBBAM_EXPORT SubDataSets : public internal::DataSetListElement<DataSetBase>
+{
+public:
+ /// \brief Creates an empty list of sub-datasets.
+ SubDataSets(void);
+
+public:
+ /// \brief Adds \p other sub-dataset to this list.
+ SubDataSets& operator+=(const DataSetBase& other); // single
+
+ /// \brief Adds \p other sub-dataset list to this list.
+ SubDataSets& operator+=(const SubDataSets& other); // list
+
+public:
+ /// \brief Adds a sub-dataset to this list.
+ void Add(const DataSetBase& subdataset);
+
+ /// \brief Removes a sub-dataset from this list.
+ void Remove(const DataSetBase& subdataset);
+};
+
+/// \brief The SubreadSet class represents a %SubreadSet root element in
+/// DataSetXML.
+///
+class PBBAM_EXPORT SubreadSet : public DataSetBase
+{
+public:
+ /// \brief Creates an empty SubreadSet dataset.
+ SubreadSet(void);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "internal/DataSetTypes.inl"
+
+#endif // DATASETTYPES_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file DataSetXsd.h
+/// \brief Defines the XSD- and namespace-related classes for DataSetXML.
+//
+// Author: Derek Barnett
+
+#ifndef DATASETXSD_H
+#define DATASETXSD_H
+
+#include "pbbam/Config.h"
+#include <map>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The XsdType enum defines the supported XSD namespaces.
+///
+enum class XsdType
+{
+ NONE
+
+ , AUTOMATION_CONSTRAINTS
+ , BASE_DATA_MODEL
+ , COLLECTION_METADATA
+ , COMMON_MESSAGES
+ , DATA_MODEL
+ , DATA_STORE
+ , DATASETS
+ , DECL_DATA
+ , PART_NUMBERS
+ , PRIMARY_METRICS
+ , REAGENT_KIT
+ , RIGHTS_AND_ROLES
+ , SAMPLE_INFO
+ , SEEDING_DATA
+};
+
+/// \brief The NamespaceInfo class provides XML namespace info (prefix & URI).
+///
+class PBBAM_EXPORT NamespaceInfo
+{
+public:
+ /// \brief Creates an empty entry.
+ ///
+ /// This constructor only exists for STL container compatibility.
+ ///
+ NamespaceInfo(void);
+
+ /// \brief Creates a valid info entry.
+ NamespaceInfo(const std::string& name,
+ const std::string& uri);
+
+public:
+ /// \brief Fetches namespace name (i.e. prefix)
+ const std::string& Name(void) const { return name_; }
+
+ /// \brief Fetches namespace URI.
+ const std::string& Uri(void) const { return uri_; }
+
+private:
+ std::string name_;
+ std::string uri_;
+};
+
+/// \brief The NamespaceRegistry class provides a per-dataset registry of XML
+/// namespace information.
+///
+/// This is used to format XML output - properly prefixing element labels with
+/// namespace as appropriate.
+///
+class PBBAM_EXPORT NamespaceRegistry
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ NamespaceRegistry(void);
+ NamespaceRegistry(const NamespaceRegistry& other);
+ NamespaceRegistry(NamespaceRegistry&& other);
+ NamespaceRegistry& operator=(const NamespaceRegistry& other);
+ NamespaceRegistry& operator=(NamespaceRegistry&& other);
+ ~NamespaceRegistry(void);
+
+ /// \}
+
+public:
+ /// \name Registry Access
+ /// \{
+
+ /// \brief Fetches namespace info for the dataset's default XSD type.
+ const NamespaceInfo& DefaultNamespace(void) const;
+
+ /// \brief Fetches dataset's default XSD type.
+ XsdType DefaultXsd(void) const;
+
+ /// \brief Fetches namespace info for the requested XSD type.
+ const NamespaceInfo& Namespace(const XsdType& xsd) const;
+
+ /// \brief Registers namespace info for a particular XSD type.
+ void Register(const XsdType& xsd, const NamespaceInfo& namespaceInfo);
+
+ /// \brief Updates dataset's default XSD type.
+ void SetDefaultXsd(const XsdType& xsd);
+
+ /// \brief Fetches the XSD type for \p elementLabel.
+ XsdType XsdForElement(const std::string& elementLabel) const;
+
+ /// \brief Fetches the XSD type for a particular URI.
+ XsdType XsdForUri(const std::string& uri) const;
+
+ /// \}
+
+private:
+ std::map<XsdType, NamespaceInfo> data_;
+ XsdType defaultXsdType_;
+};
+
+} // namespace PacBio
+} // namespace BAM
+
+#endif // DATASETXSD_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file EntireFileQuery.h
+/// \brief Defines the EntireFileQuery class.
+//
+// Author: Derek Barnett
+
+#ifndef ENTIREFILEQUERY_H
+#define ENTIREFILEQUERY_H
+
+#include "pbbam/internal/QueryBase.h"
+#include <memory>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The EntireFileQuery class provides iterable access to a DataSet's
+/// %BAM records, reading through the entire contents of each file.
+///
+/// Input files will be accessed in the order listed in the DataSet.
+///
+/// \include code/EntireFileQuery.txt
+///
+/// Iteration is not limited to only 'const' records. The files themselves will
+/// not be affected, but individual records may be modified if needed.
+///
+/// \include code/EntireFileQuery_NonConst.txt
+///
+/// \note DataSets can be implicitly constructed from %BAM filenames as well.
+/// Thus a single %BAM file can be read through using the following:
+///
+/// \include code/EntireFileQuery_BamFilename.txt
+///
+class PBBAM_EXPORT EntireFileQuery : public internal::IQuery
+{
+public:
+ /// \brief Creates a new EntireFileQuery, reading through the entire
+ /// contents of a dataset.
+ ///
+ /// \param[in] dataset input data source(s)
+ /// \throws std::runtime_error on failure to open/read underlying %BAM
+ /// files.
+ ///
+ EntireFileQuery(const PacBio::BAM::DataSet& dataset);
+ ~EntireFileQuery(void);
+
+public:
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
+
+private:
+ struct EntireFileQueryPrivate;
+ std::unique_ptr<EntireFileQueryPrivate> d_;
+};
+
+} // namespace BAM
+} // namspace PacBio
+
+#endif // ENTIREFILEQUERY_H
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file FastaReader.h
+/// \brief Defines the FastaReader class.
+//
+// Author: Derek Barnett
+
+#ifndef FASTAREADER_H
+#define FASTAREADER_H
+
+#include "pbbam/FastaSequence.h"
+#include <memory>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { struct FastaReaderPrivate; }
+
+///
+/// \brief The FastaReader provides sequential access to FASTA records.
+///
+class FastaReader
+{
+public:
+ ///
+ /// \brief Reads all FASTA sequences from a file
+ ///
+ /// \param fn FASTA filename
+ /// \return vector of FastaSequence results
+ ///
+ static std::vector<FastaSequence> ReadAll(const std::string& fn);
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ explicit FastaReader(const std::string& fn);
+ FastaReader(FastaReader&& other);
+ FastaReader& operator=(FastaReader&& other);
+ ~FastaReader(void);
+
+ // copy is disabled
+ FastaReader(const FastaReader&) = delete;
+ FastaReader& operator=(const FastaReader&) = delete;
+
+ /// \}
+
+public:
+ /// \name Sequence Access
+ /// \{
+
+ ///
+ /// \brief GetNext
+ ///
+ /// \code{cpp}
+ ///
+ /// FastaReader reader{ fn };
+ /// FastaSequence f;
+ /// while (reader.GetNext(f)) {
+ /// // do stuff with f
+ /// }
+ /// \endcode
+ ///
+ /// \param[out] record
+ /// \return success/failure
+ ///
+ bool GetNext(FastaSequence& record);
+
+ /// \}
+
+private:
+ std::unique_ptr<internal::FastaReaderPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // FASTAREADER_H
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file FastaSequence.h
+/// \brief Defines the FastaSequence class.
+//
+// Author: Derek Barnett
+
+#ifndef FASTASEQUENCE_H
+#define FASTASEQUENCE_H
+
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+///
+/// \brief The FastaSequence class represents a FASTA record (name & bases)
+///
+class FastaSequence
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ ///
+ /// \brief FastaSequence
+ /// \param name
+ /// \param bases
+ ///
+ explicit FastaSequence(std::string name, std::string bases);
+
+ FastaSequence(void) = default;
+ FastaSequence(const FastaSequence&) = default;
+ FastaSequence(FastaSequence&&) = default;
+ FastaSequence& operator=(const FastaSequence&) = default;
+ FastaSequence& operator=(FastaSequence&&) = default;
+ ~FastaSequence(void) = default;
+
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ ///
+ /// \brief Name
+ /// \return
+ ///
+ std::string Name(void) const;
+
+ ///
+ /// \brief Bases
+ /// \return
+ ///
+ std::string Bases(void) const;
+
+ /// \}
+
+private:
+ std::string name_;
+ std::string bases_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "internal/FastaSequence.inl"
+
+#endif // FASTASEQUENCE_H
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file FrameEncodingType.h
+/// \brief Defines the FrameEncodingType enum.
+//
+// Author: Derek Barnett
+
+#ifndef FRAMEENCODINGTYPE_H
+#define FRAMEENCODINGTYPE_H
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief This enum defines the possible encoding modes used in Frames data
+/// (e.g. BamRecord::IPD or BamRecord::PulseWidth).
+///
+/// The LOSSY mode is the default in production output; LOSSLESS mode
+/// being used primarily for internal applications.
+///
+/// \sa https://github.com/PacificBiosciences/PacBioFileFormats/blob/3.0/BAM.rst
+/// for more information on pulse frame encoding schemes.
+///
+enum class FrameEncodingType
+{
+ LOSSY ///< 8-bit compression (using CodecV1) of frame data
+ , LOSSLESS ///< 16-bit native frame data
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // FRAMEENCODINGTYPE_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Frames.h
+/// \brief Defines the Frames class.
+//
+// Author: Derek Barnett
+
+#ifndef FRAMES_H
+#define FRAMES_H
+
+#include "pbbam/Config.h"
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The Frames class represents pulse frame data.
+///
+/// Frame data may be stored in either their raw, 16-bit values or
+/// using a lossy, 8-bit compression scheme.
+///
+/// This class is used to store the data and convert between the 2 storage types.
+///
+class PBBAM_EXPORT Frames
+{
+public:
+ /// \name Conversion Methods
+ /// \{
+
+ /// \brief Constructs a Frames object from encoded (lossy, 8-bit) data.
+ ///
+ /// \note This method should probably not be needed often by client code
+ /// working with frame data. It exists primarily for (internal)
+ /// parsing & interpretation of the %BAM file contents. The method is
+ /// available, though, should the conversion operation be needed.
+ ///
+ /// \param[in] codedData encoded data
+ /// \returns Frames object
+ ///
+ static Frames Decode(const std::vector<uint8_t>& codedData);
+
+ /// \brief Creates encoded, compressed frame data from raw input data.
+ ///
+ /// \param[in] frames raw frame data
+ /// \returns lossy, 8-bit encoded frame data
+ ///
+ static std::vector<uint8_t> Encode(const std::vector<uint16_t>& frames);
+
+ /// \}
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ Frames(void);
+ Frames(const std::vector<uint16_t>& frames);
+ Frames(std::vector<uint16_t>&& frames);
+ Frames(const Frames& other);
+ Frames(Frames&& other);
+ Frames& operator=(const Frames& other);
+ Frames& operator=(Frames&& other);
+ ~Frames(void);
+
+ /// \}
+
+public:
+ /// \name Access Data
+ /// \{
+
+ /// \returns Frame data in expanded (not encoded) form
+ std::vector<uint16_t>& DataRaw(void);
+ const std::vector<uint16_t>& Data(void) const;
+
+ /// \}
+
+public:
+ /// \name Conversion Methods
+ /// \{
+
+ /// \returns Frame data in (lossy, 8-bit) encoded form.
+ std::vector<uint8_t> Encode(void) const;
+
+ /// \}
+
+public:
+ /// \name Comparison Operators
+ /// \{
+
+ bool operator==(const Frames& other) const;
+ bool operator!=(const Frames& other) const;
+
+ /// \}
+
+public:
+ /// \name STL Compatbility
+ /// \{
+
+ /// \returns A const_iterator to the beginning of the sequence.
+ std::vector<uint16_t>::const_iterator cbegin(void) const;
+
+ /// \returns A const_iterator to the element past the end of the sequence.
+ std::vector<uint16_t>::const_iterator cend(void) const;
+
+ /// \returns A const_iterator to the beginning of the sequence.
+ std::vector<uint16_t>::const_iterator begin(void) const;
+
+ /// \returns A const_iterator to the element past the end of the sequence.
+ std::vector<uint16_t>::const_iterator end(void) const;
+
+ /// \returns An iterator to the beginning of the sequence.
+ std::vector<uint16_t>::iterator begin(void);
+
+ /// \returns An iterator to the element past the end of the sequence.
+ std::vector<uint16_t>::iterator end(void);
+
+ /// \returns The number of frame data points.
+ size_t size(void) const;
+
+ /// \returns True if the container is empty, false otherwise.
+ bool empty(void) const;
+
+ /// \}
+
+public:
+ /// \name Access Data
+ /// \{
+
+ /// Sets this record's data.
+ ///
+ /// \param[in] frames data in expanded (not encoded) form
+ /// \returns reference to this object
+ ///
+ Frames& Data(const std::vector<uint16_t>& frames);
+
+ /// Sets this record's data.
+ ///
+ /// \param[in] frames data in expanded (not encoded) form
+ /// \returns reference to this object
+ ///
+ Frames& Data(std::vector<uint16_t>&& frames);
+
+ /// \}
+
+private:
+ std::vector<uint16_t> data_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/Frames.inl"
+
+#endif // FRAMES_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file GenomicInterval.h
+/// \brief Defines the GenomicInterval class.
+//
+// Author: Derek Barnett
+
+#ifndef GENOMICINTERVAL_H
+#define GENOMICINTERVAL_H
+
+#include "pbbam/Config.h"
+#include "pbbam/Interval.h"
+#include "pbbam/Position.h"
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The GenomicInterval class represents a genomic interval (reference
+/// name and 0-based coordinates).
+///
+class PBBAM_EXPORT GenomicInterval
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty genomic interval
+ GenomicInterval(void);
+
+ /// \brief Creates a genomic interval on sequence with \p name, using range:
+ /// [\p start, \p stop)
+ GenomicInterval(const std::string& name,
+ const Position& start,
+ const Position& stop);
+
+ /// \brief Creates a genomic interval, using REGION string
+ ///
+ /// "<ref>:<start>-<stop>" ("chr8:200-600")
+ ///
+ /// \note The htslib/samtools REGION string expects start positions to be
+ /// 1-based. However, throughout pbbam (including the rest of this
+ /// class), we stick to 0-based start coordinates. Thus, while the
+ /// syntax matches that of samtools, we are using a 0-based start
+ /// coordinate here.
+ ///
+ GenomicInterval(const std::string& zeroBasedRegionString);
+
+ GenomicInterval(const GenomicInterval& other);
+ GenomicInterval& operator=(const GenomicInterval& other);
+
+ ~GenomicInterval(void);
+
+ /// \}
+
+public:
+ /// \name Comparison Operators
+ /// \{
+
+ /// \returns true if same id & underlying interval
+ bool operator==(const GenomicInterval& other) const;
+
+ /// \returns true if either ids or underlying intervals differ
+ bool operator!=(const GenomicInterval& other) const;
+
+ /// \}
+
+public:
+ /// \name Interval Operations
+ /// \{
+
+ /// \returns true if same id and underlying Interval::CoveredBy() other.
+ bool CoveredBy(const GenomicInterval& other) const;
+
+ /// \returns true if same id and underlying Interval::Covers() other.
+ bool Covers(const GenomicInterval& other) const;
+
+ /// \returns true if same id and underlying Interval::Intersects() other.
+ bool Intersects(const GenomicInterval& other) const;
+
+ /// \returns true if underlying Interval::IsValid(), and id/endpoints are
+ /// non-negative.
+ ///
+ bool IsValid(void) const;
+
+ /// \returns length of underlying
+ size_t Length(void) const;
+
+ /// \}
+
+
+public:
+ /// \name Attributes
+ /// \{
+
+ /// \returns interval reference name
+ std::string Name(void) const;
+
+ /// \returns underlying Interval object
+ PacBio::BAM::Interval<Position> Interval(void) const;
+
+ /// \returns interval start coordinate
+ Position Start(void) const;
+
+ /// \returns interval stop coordinate
+ Position Stop(void) const;
+
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ /// Sets this interval's reference name.
+ ///
+ /// \param[in] name
+ /// \returns reference to this interval
+ ///
+ GenomicInterval& Name(const std::string& name);
+
+ /// Sets this underlying Interval
+ ///
+ /// \param[in] interval
+ /// \returns reference to this interval
+ ///
+ GenomicInterval& Interval(const PacBio::BAM::Interval<Position>& interval);
+
+ /// Sets this interval's start coordinate.
+ ///
+ /// \param[in] start
+ /// \returns reference to this interval
+ ///
+ GenomicInterval& Start(const Position start);
+
+ /// Sets this interval's stop coordinate.
+ ///
+ /// \param[in] stop
+ /// \returns reference to this interval
+ ///
+ GenomicInterval& Stop(const Position stop);
+
+ /// \}
+
+private:
+ std::string name_;
+ PacBio::BAM::Interval<Position> interval_;
+};
+
+} // namespace BAM
+} // namspace PacBio
+
+#include "pbbam/internal/GenomicInterval.inl"
+
+#endif // GENOMICINTERVAL_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file GenomicIntervalQuery.h
+/// \brief Defines the GenomicIntervalQuery class.
+//
+// Author: Derek Barnett
+
+#ifndef GENOMICINTERVALQUERY_H
+#define GENOMICINTERVALQUERY_H
+
+#include "pbbam/GenomicInterval.h"
+#include "pbbam/internal/QueryBase.h"
+#include <memory>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The GenomicIntervalQuery class provides iterable access to a
+/// DataSet's %BAM records, limiting results to those overlapping a
+/// GenomicInterval.
+///
+/// Example:
+/// \include code/GenomicIntervalQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".bai" index file.
+/// Use BamFile::EnsureStandardIndexExists before creating the query if
+/// one may not be present.
+///
+class PBBAM_EXPORT GenomicIntervalQuery : public internal::IQuery
+{
+public:
+
+ /// \brief Constructs a new GenomiIntervalQuery, limiting record results to
+ /// only those overalpping a GenomicInterval.
+ ///
+ /// \param[in] interval genomic interval of interest
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or
+ /// BAI files.
+ ///
+ GenomicIntervalQuery(const GenomicInterval& interval,
+ const PacBio::BAM::DataSet& dataset);
+ ~GenomicIntervalQuery(void);
+
+public:
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
+
+public:
+ /// \brief Sets a new genomic interval.
+ ///
+ /// This allows the same dataset/query to be re-used over multiple regions of
+ /// interest:
+ ///
+ /// \include code/GenomicIntervalQuery_Reuse.txt
+ ///
+ /// \param[in] interval new genomic interval
+ /// \returns reference to this query
+ ///
+ GenomicIntervalQuery& Interval(const GenomicInterval& interval);
+
+ /// \returns Current genomic interval active on this query.
+ const GenomicInterval& Interval(void) const;
+
+private:
+ struct GenomicIntervalQueryPrivate;
+ std::unique_ptr<GenomicIntervalQueryPrivate> d_;
+};
+
+} // namespace BAM
+} // namspace PacBio
+
+#endif // GENOMICINTERVALQUERY_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file IRecordWriter.h
+/// \brief Defines the IRecordWriter interface.
+//
+// Author: Derek Barnett
+
+#ifndef IRECORDWRITER_H
+#define IRECORDWRITER_H
+
+namespace PacBio {
+namespace BAM {
+
+class BamRecord;
+class BamRecordImpl;
+
+class IRecordWriter
+{
+public:
+ virtual ~IRecordWriter(void);
+
+public:
+
+ /// \brief Try to flush any buffered data to file.
+ ///
+ /// \note The underlying implementation may not necessarily flush buffered
+ /// data immediately, especially in a multithreaded writer situation.
+ /// Let the writer go out of scope to fully ensure flushing.
+ ///
+ /// \throws std::runtime_error if flush fails
+ ///
+ virtual void TryFlush(void) =0;
+
+
+ /// \brief Write a record to the output %BAM file.
+ ///
+ /// \param[in] record BamRecord object
+ ///
+ /// \throws std::runtime_error on failure to write
+ ///
+ virtual void Write(const BamRecord& record) =0;
+
+ /// \brief Write a record to the output %BAM file.
+ ///
+ /// \param[in] recordImpl BamRecordImpl object
+ ///
+ /// \throws std::runtime_error on failure to write
+ ///
+ virtual void Write(const BamRecordImpl& recordImpl) =0;
+
+protected:
+ IRecordWriter(void);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // IRECORDWRITER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file IndexedFastaReader.h
+/// \brief Defines the IndexedFastaReader class.
+//
+// Author: David Alexander
+
+#ifndef INDEXEDFASTAREADER_H
+#define INDEXEDFASTAREADER_H
+
+#include "pbbam/Orientation.h"
+#include "pbbam/Position.h"
+#include <htslib/faidx.h>
+#include <string>
+#include <iostream>
+#include <stdexcept>
+
+namespace PacBio {
+namespace BAM {
+
+class GenomicInterval;
+class BamRecord;
+
+/// \brief The IndexedFastaReader class provides random-access to FASTA file
+/// data.
+///
+class IndexedFastaReader {
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ IndexedFastaReader(void) = delete;
+ IndexedFastaReader(const std::string& filename);
+ IndexedFastaReader(const IndexedFastaReader& src);
+ IndexedFastaReader& operator=(const IndexedFastaReader& rhs);
+ ~IndexedFastaReader(void);
+
+ /// \}
+
+public:
+ /// name Sequence Access
+ /// \{
+
+ /// \brief Fetches FASTA sequence for desired interval.
+ ///
+ /// \param[in] id reference sequence name
+ /// \param[in] begin start position
+ /// \param[in] end end position
+ ///
+ /// \returns sequence string at desired interval
+ ///
+ /// \throws std::runtime_error on failure to fetch sequence
+ ///
+ std::string Subsequence(const std::string& id,
+ Position begin,
+ Position end) const;
+
+ /// \brief Fetches FASTA sequence for desired interval.
+ ///
+ /// \param[in] interval desired interval
+ ///
+ /// \returns sequence string at desired interval
+ ///
+ /// \throws std::runtime_error on failure to fetch sequence
+ ///
+ std::string Subsequence(const GenomicInterval& interval) const;
+
+ /// \brief Fetches FASTA sequence for desired interval.
+ ///
+ /// \param[in] htslibRegion htslib/samtools-formatted REGION string
+ /// representing the desired interval
+ ///
+ /// \returns sequence string at desired interval
+ ///
+ /// \throws std::runtime_error on failure to fetch sequence
+ ///
+ std::string Subsequence(const char* htslibRegion) const;
+
+ /// \brief Fetches FASTA sequence corresponding to a BamRecord, oriented and
+ /// gapped as requested.
+ ///
+ /// For example, "native" orientation and "gapped" will return the reference
+ /// sequence with gaps inserted, as would align against the read in "native"
+ /// orientation.
+ ///
+ /// \param[in] bamRecord input BamRecord to derive interval/CIGAR
+ /// data
+ /// \param[in] orientation orientation of output
+ /// \param[in] gapped if true, gaps/padding will be inserted, per
+ /// record's CIGAR info.
+ /// \param[in] exciseSoftClips if true, any soft-clipped positions will be
+ /// removed from query ends
+ ///
+ /// \returns sequence string over the record's interval
+ ///
+ /// \throws std::runtime_error on failure to fetch sequence
+ ///
+ std::string ReferenceSubsequence(const BamRecord& bamRecord,
+ const Orientation orientation=Orientation::GENOMIC,
+ const bool gapped=false,
+ const bool exciseSoftClips=false) const;
+
+ /// \}
+
+public:
+ /// \name File Attributes
+ /// \{
+
+ /// \returns true if FASTA file contains a sequence matching \p name
+ bool HasSequence(const std::string& name) const;
+
+ /// \returns number of sequences stored in FASTA file
+ int NumSequences(void) const;
+
+ /// \returns length of FASTA sequence
+ ///
+ /// \throws std::runtime_error if length could not be determined
+ ///
+ int SequenceLength(const std::string& name) const;
+
+ /// \}
+
+private:
+ std::string filename_;
+ faidx_t* handle_;
+
+private:
+ void Close(void);
+ bool Open(const std::string& filename);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // INDEXEDFASTAREADER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Interval.h
+/// \brief Defines the Interval class.
+//
+// Author: Derek Barnett
+
+#ifndef INTERVAL_H
+#define INTERVAL_H
+
+#include "pbbam/Config.h"
+#include <string>
+
+#define BOOST_ICL_USE_STATIC_BOUNDED_INTERVALS
+#include <boost/icl/discrete_interval.hpp>
+#include <boost/icl/interval_traits.hpp>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief Represents a half-open (right-open) interval [start, stop)
+///
+/// \note This class is agnostic whether the values are 0-based or 1-based.
+/// Client code should primarily work with GenomicInterval, which does
+/// enforce this distinction.
+///
+template<typename T>
+class Interval
+{
+public:
+ typedef boost::icl::discrete_interval<T> interval_type;
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty interval [0,0)
+ Interval(void);
+
+ /// \brief Creates a 'singleton' interval [val,val+1)
+ Interval(const T val);
+
+ /// brief Creates an interval from [start, stop) */
+ Interval(const T start, const T stop);
+
+ Interval(const Interval<T>& other);
+
+ /// \}
+
+public:
+ /// \name Comparison Operators
+ /// \{
+
+ /// \returns true if both intervals share the same endpoints
+ bool operator==(const Interval<T>& other) const;
+
+ /// \returns true if either interval's endpoints differ
+ bool operator!=(const Interval<T>& other) const;
+
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ /// \returns interval's start coordinate
+ T Start(void) const;
+
+ /// Sets this interval's start coordinate.
+ ///
+ /// \param[in] start
+ /// \returns reference to this interval
+ ///
+ Interval<T>& Start(const T& start);
+
+ /// \returns interval's stop coordinate
+ T Stop(void) const;
+
+ /// Sets this interval's stop coordinate.
+ ///
+ /// \param[in] stop
+ /// \returns reference to this interval
+ ///
+ Interval<T>& Stop(const T& stop);
+
+ /// \}
+
+public:
+ /// \name Interval Operations
+
+ /// \returns true if this interval is fully covered by (or contained in) \p other
+ bool CoveredBy(const Interval<T>& other) const;
+
+ //// \returns true if this interval covers (or contains) \p other
+ bool Covers(const Interval<T>& other) const;
+
+ /// \returns true if intervals interset
+ bool Intersects(const Interval<T>& other) const;
+
+ /// \returns true if interval is valid (e.g. start < stop)
+ bool IsValid(void) const;
+
+ /// \returns interval length
+ size_t Length(void) const;
+
+ /// \}
+
+private:
+ interval_type data_;
+};
+
+} // namespace BAM
+} // namspace PacBio
+
+#include "pbbam/internal/Interval.inl"
+
+#endif // GENOMICINTERVAL_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file LocalContextFlags.h
+/// \brief Defines the LocalContextFlags enum & helper method(s).
+//
+// Author: Lance Hepler
+
+#ifndef LOCALCONTEXTFLAGS_H
+#define LOCALCONTEXTFLAGS_H
+
+#include "pbbam/Config.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The LocalContextFlags enum defines the flags that can be used
+/// to describe a subread's "local context", i.e. whether it is
+/// flanked by barcodes/adapters or its pass orientation.
+///
+enum LocalContextFlags : uint8_t
+{
+ NO_LOCAL_CONTEXT = 0, ///< No context information available
+ ADAPTER_BEFORE = 1, ///< Adapter precedes subread
+ ADAPTER_AFTER = 2, ///< Adapter follows subread
+ BARCODE_BEFORE = 4, ///< Barcode precedes subread
+ BARCODE_AFTER = 8, ///< Barcode follows subread
+ FORWARD_PASS = 16, ///< Subread's orientation is 'forward pass'
+ REVERSE_PASS = 32 ///< Subread's orientation is 'reverse pass'
+};
+
+
+/// \returns a LocalContextFlags value containing the result of the bitwise-OR
+/// operation of \p lhs and \p rhs.
+// constexpr is implicitly inline
+constexpr LocalContextFlags operator|(const LocalContextFlags lhs, const LocalContextFlags rhs)
+{
+ return static_cast<LocalContextFlags>(static_cast<int>(lhs) | static_cast<int>(rhs));
+}
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // LOCALCONTEXTFLAGS_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file MD5.h
+/// \brief Defines basic MD5 hash utilities
+//
+// Author: Brett Bowman
+
+#ifndef MD5_H
+#define MD5_H
+
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief MD5 hash of a string as a 32-digit hexadecimal string
+///
+std::string MD5Hash(const std::string& str);
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // MD5_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Orientation.h
+/// \brief Defines the Orientation enum.
+//
+// Author: Derek Barnett
+
+#ifndef ORIENTATION_H
+#define ORIENTATION_H
+
+#include "pbbam/Config.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief This enum defines the orientations recognized by BamRecord, for
+/// presenting "per-base" data.
+///
+/// Orientation::NATIVE indicates that data should be presented in the subread's
+/// original form.
+///
+/// Orientation::GENOMIC indicates that data should be presented relative to
+/// genomic forward strand. This means that data will be reversed (or
+/// reverse-complemented) if the subread was aligned to the reverse strand.
+///
+enum class Orientation
+{
+ NATIVE ///< Present data in 'raw' original orientation, regardless of aligned Strand
+ , GENOMIC ///< Present data in aligned orientation, always relative to Strand::FORWARD.
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // ORIENTATION_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiBasicTypes.h
+/// \brief Defines the basic data structures used in PBI lookups.
+//
+// Author: Derek Barnett
+
+#ifndef PBIBASICTYPES_H
+#define PBIBASICTYPES_H
+
+#include "pbbam/Compare.h"
+#include "pbbam/Config.h"
+#include <deque>
+#include <utility>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The IndexResultBlock class represents a contiguous group of records
+/// returned from a PBI lookup.
+///
+/// Contiguous reads that satisfy a PBI lookup query will be merged down into a
+/// single block. This helps to minimize the number of seeks in subsequent read
+/// operations.
+///
+/// An PBI-enabled reader or query can iterate over a list of IndexResultBlocks;
+/// for each block, seeking to the first record and then sequentially reading
+/// 'numReads' consecutive records before needing to seek again.
+///
+struct PBBAM_EXPORT IndexResultBlock
+{
+public:
+ IndexResultBlock(void);
+ IndexResultBlock(size_t idx, size_t numReads);
+
+public:
+ bool operator==(const IndexResultBlock& other) const;
+ bool operator!=(const IndexResultBlock& other) const;
+
+public:
+ size_t firstIndex_; ///< index of block's first record in BAM/PBI files (e.g. i-th record)
+ size_t numReads_; ///< number of reads in this block
+ int64_t virtualOffset_; ///< virtual offset of first record in this block
+};
+
+/// \brief container of PBI result blocks
+///
+typedef std::deque<IndexResultBlock> IndexResultBlocks;
+
+/// \brief container of raw PBI indices
+///
+/// This is the primary result of PbiFilter -associated classes. This raw list
+/// can participate in set operations (union, intersect) for compound filters,
+/// and then be merged down into IndexResultBlocks for actual data file
+/// random-access.
+///
+typedef std::vector<size_t> IndexList;
+
+/// \brief pair representing a range of PBI indices: where interval
+/// is [first, second)
+///
+/// Used primarily by the PBI's CoordinateSortedData components.
+///
+/// \sa PbiReferenceEntry, PbiRawReferenceData, & ReferenceLookupData
+///
+typedef std::pair<size_t, size_t> IndexRange;
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/PbiBasicTypes.inl"
+
+#endif // PBIBASICTYPES_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiBuilder.h
+/// \brief Defines the PbiBuilder class.
+//
+// Author: Derek Barnett
+
+#ifndef PBIBUILDER_H
+#define PBIBUILDER_H
+
+#include "pbbam/Config.h"
+#include <memory>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+class BamRecord;
+class PbiRawData;
+
+namespace internal { class PbiBuilderPrivate; }
+
+/// \brief The PbiBuilder class construct PBI index data from %BAM record data.
+///
+/// Records are added one-by-one. This allows for either whole-file indexing of
+/// existing %BAM files or for indexing "on-the-fly" alongside a %BAM file as it
+/// is generated.
+///
+/// For simple PBI creation from existing %BAM files, see PbiFile::CreateFrom.
+/// This is the recommended approach, unless finer control or additional
+/// processing is needed.
+///
+class PBBAM_EXPORT PbiBuilder
+{
+public:
+ /// \brief This enum allows you to control the compression level of the
+ /// output PBI file.
+ ///
+ /// Values are equivalent to zlib compression levels. See its documentation
+ /// for more details: http://www.zlib.net/manual.html
+ ///
+ enum CompressionLevel
+ {
+ CompressionLevel_0 = 0
+ , CompressionLevel_1 = 1
+ , CompressionLevel_2 = 2
+ , CompressionLevel_3 = 3
+ , CompressionLevel_4 = 4
+ , CompressionLevel_5 = 5
+ , CompressionLevel_6 = 6
+ , CompressionLevel_7 = 7
+ , CompressionLevel_8 = 8
+ , CompressionLevel_9 = 9
+
+ , DefaultCompression = -1
+ , NoCompression = CompressionLevel_0
+ , FastCompression = CompressionLevel_1
+ , BestCompression = CompressionLevel_9
+ };
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Initializes builder to write data to \p pbiFilename.
+ ///
+ /// \param[in] pbiFilename output filename
+ /// \param[in] compressionLevel zlib compression level
+ /// \param[in] numThreads number of threads for compression. If set to
+ /// 0, PbiBuilder will attempt to determine a
+ /// reasonable estimate. If set to 1, this will
+ /// force single-threaded execution. No checks
+ /// are made against an upper limit.
+ ///
+ /// \throws std::runtime_error if PBI file cannot be opened for writing
+ ///
+ PbiBuilder(const std::string& pbiFilename,
+ const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression,
+ const size_t numThreads = 4);
+
+ /// \brief Initializes builder to write data to \p pbiFilename.
+ ///
+ /// Reference data-tracking structures will be initialized to expect
+ /// \p numReferenceSequences. (This is useful so that we can mark any
+ /// references that lack observed data appropriately).
+ ///
+ /// \param[in] pbiFilename output filename
+ /// \param[in] numReferenceSequences number of possible reference
+ /// sequences, e.g. BamHeader::NumSequences
+ /// \param[in] compressionLevel zlib compression level
+ /// \param[in] numThreads number of threads for compression. If set to
+ /// 0, PbiBuilder will attempt to determine a
+ /// reasonable estimate. If set to 1, this will
+ /// force single-threaded execution. No checks
+ /// are made against an upper limit.
+ ///
+ /// \throws std::runtime_error if PBI file cannot be opened for writing
+ ///
+ PbiBuilder(const std::string& pbiFilename,
+ const size_t numReferenceSequences,
+ const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression,
+ const size_t numThreads = 4);
+
+ /// \brief Initializes builder to write data to \p pbiFilename.
+ ///
+ /// Reference data-tracking structures will be initialized to expect
+ /// \p numReferenceSequences, but only if \p isCoordinateSorted is true.
+ ///
+ /// \param[in] pbiFilename output filename
+ /// \param[in] numReferenceSequences number of possible reference
+ /// sequences, e.g. BamHeader::NumSequences
+ /// \param[in] isCoordinateSorted if false, disables reference
+ /// sequence tracking
+ /// (BamHeader::SortOrder != "coordinate")
+ /// \param[in] compressionLevel zlib compression level
+ /// \param[in] numThreads number of threads for compression. If set to
+ /// 0, PbiBuilder will attempt to determine a
+ /// reasonable estimate. If set to 1, this will
+ /// force single-threaded execution. No checks
+ /// are made against an upper limit.
+ ///
+ /// \throws std::runtime_error if PBI file cannot be opened for writing
+ ///
+ PbiBuilder(const std::string& pbiFilename,
+ const size_t numReferenceSequences,
+ const bool isCoordinateSorted,
+ const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression,
+ const size_t numThreads = 4);
+
+ /// \brief Destroys builder, writing its data out to PBI file.
+ ///
+ /// On destruction, data summaries are calculated, raw data is written to
+ /// file, and file handle closed.
+ ///
+ ~PbiBuilder(void);
+
+ /// \}
+
+public:
+ /// \name Index Building
+ /// \{
+
+ /// \brief Adds \p record's data to underlying raw data structure.
+ ///
+ /// \note \p vOffset is a BGZF \b virtual offset into the %BAM file. To get
+ /// this value, you should use one of the following: \n
+ /// - while reading existing %BAM: BamReader::VirtualTell \n
+ /// - while writing new %BAM: BamWriter::Write(const BamRecord& record, int64_t* vOffset) \n
+ ///
+ ///
+ /// To build a PBI index while generating a %BAM file:
+ /// \include code/PbiBuilder_WithWriter.txt
+ ///
+ /// To build a PBI index from an existing %BAM file:
+ /// \include code/PbiBuilder_WithReader.txt
+ ///
+ /// \param[in] record input BamRecord to pull index data from
+ /// \param[in] vOffset \b virtual offset into %BAM file where record begins
+ ///
+ void AddRecord(const BamRecord& record, const int64_t vOffset);
+
+ /// \returns const reference to current raw index data. Mostly only used for
+ /// testing; shouldn't be needed by most client code.
+ ///
+ const PbiRawData& Index(void) const;
+
+ /// \}
+
+private:
+ std::unique_ptr<internal::PbiBuilderPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // PBIBUILDER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFile.h
+/// \brief Defines the PbiFile enums, typedefs, and methods.
+//
+// Author: Derek Barnett
+
+#ifndef PBIFILE_H
+#define PBIFILE_H
+
+#include "pbbam/Config.h"
+#include "pbbam/PbiBuilder.h"
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+class BamFile;
+
+namespace PbiFile
+{
+ /// \brief This enum describes the PBI file sections
+ ///
+ enum Section
+ {
+ BASIC = 0x0000 ///< BasicData (required)
+ , MAPPED = 0x0001 ///< MappedData (always optional)
+ , REFERENCE = 0x0002 ///< ReferenceData (always optional)
+ , BARCODE = 0x0004 ///< BarcodeData (always optional)
+
+ , ALL = BASIC | MAPPED | REFERENCE | BARCODE ///< Synonym for 'all sections'
+ };
+
+ /// \brief Helper typedef for storing multiple Section flags.
+ ///
+ typedef uint16_t Sections;
+
+ /// \brief This enum describes the PBI file version.
+ enum VersionEnum
+ {
+ Version_3_0_0 = 0x030000 ///< v3.0.0
+ , Version_3_0_1 = 0x030001 ///< v3.0.1
+
+ , CurrentVersion = Version_3_0_1 ///< Synonym for the current PBI version.
+ };
+
+ /// \brief Builds PBI index data from the supplied %BAM file and writes a
+ /// ".pbi" file.
+ ///
+ /// \param[in] bamFile source %BAM file
+ ///
+ /// \throws std::runtime_error if index file could not be created
+ ///
+ PBBAM_EXPORT void CreateFrom(const BamFile& bamFile,
+ const PbiBuilder::CompressionLevel compressionLevel = PbiBuilder::DefaultCompression,
+ const size_t numThreads = 4);
+
+} // namespace PbiFile
+} // namespace BAM
+} // namespace PacBio
+
+#endif // PBIFILE_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilter.h
+/// \brief Defines the PbiFilter class & helper 'concept'.
+//
+// Author: Derek Barnett
+
+#ifndef PBIFILTER_H
+#define PBIFILTER_H
+
+#include "pbbam/DataSet.h"
+#include "pbbam/PbiBasicTypes.h"
+#include "pbbam/PbiIndex.h"
+#include <boost/concept_check.hpp>
+#include <memory>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { struct PbiFilterPrivate; }
+
+/// \brief The PbiFilterConcept class provides compile-time enforcement of the
+/// required interface for PbiFilter's child filters.
+///
+template<typename T>
+struct PbiFilterConcept
+{
+ BOOST_CONCEPT_USAGE(PbiFilterConcept)
+ {
+ // All PBI filters (built-in or client-define) need only provide this
+ // interface:
+ //
+ // bool Accepts(const PbiRawData& index, const size_t row) const;
+ //
+ const PbiRawData index;
+ bool result = filter.Accepts(index, 0);
+ (void)result;
+ }
+
+private:
+ T filter;
+// PbiRawData index;
+};
+
+/// \brief The PbiFilter class provides a mechanism for performing PBI-enabled
+/// lookups.
+///
+/// The PbiFilter API is designed to be flexible, both built-in and for
+/// client-side customization. Built-in filters are provided for common queries,
+/// and client code can define and use custom filters as well. More complex
+/// filtering rules can be constructed via composition of simpler child filters.
+///
+/// Filter objects used as children of PbiFilter need only provide a method that
+/// matches this signature:
+///
+/// \include code/PbiFilter_Interface.txt
+///
+/// This requirement is enforced internally, using the PbiFilterConcept to
+/// require a compatible interface without requiring inheritance. This approach
+/// allows composition of heterogeneous filter types without worrying about a
+/// class hierarchy, pointer ownership across library/client boundaries, etc.
+///
+/// Thus a client application can define a custom filter if the built-in filters
+/// do not quite meet requirements. This filter may then be used in further
+/// PbiFilter composition, or directly to PbiFilterQuery
+///
+/// \include code/PbiFilter_CustomFilter.txt
+///
+/// As mentioned above, complex filters can be built up using multiple "child"
+/// filters. These complex filters are constructed by using either
+/// PbiFilter::Union (logical-OR over all direct children) or
+/// PbiFilter::Intersection (logical-AND over direct children).
+///
+/// \include code/PbiFilter_Composition.txt
+///
+class PBBAM_EXPORT PbiFilter
+{
+public:
+ enum CompositionType
+ {
+ INTERSECT
+ , UNION
+ };
+
+public:
+ /// \name Set Operations
+ /// \{
+
+ /// \brief Creates a PbiFilter that acts as intersection of the input
+ /// filters.
+ ///
+ /// A record must satisfy \b all of this filter's direct "child" filters.
+ ///
+ /// Equivalent to:
+ /// \include code/PbiFilter_Intersection_Copy.txt
+ ///
+ /// \param[in] filters vector of child filters
+ /// \returns composite filter
+ ///
+ static PbiFilter Intersection(const std::vector<PbiFilter>& filters);
+
+ /// \brief Creates a PbiFilter that acts as an intersection of the input
+ /// filters.
+ ///
+ /// A record must satisfy \b all of this filter's direct "child" filters.
+ ///
+ /// Equivalent to:
+ /// \include code/PbiFilter_Intersection_Move.txt
+ ///
+ /// \param[in] filters vector of child filters
+ /// \returns composite filter
+ ///
+ static PbiFilter Intersection(std::vector<PbiFilter>&& filters);
+
+ /// \brief Creates a PbiFilter that acts as a union of the input filters.
+ ///
+ /// A record must satisfy \b any of this filter's direct "child" filters.
+ ///
+ /// Equivalent to:
+ /// \include code/PbiFilter_Union_Copy.txt
+ ///
+ /// \param[in] filters vector of child filters
+ /// \returns composite filter
+ ///
+ static PbiFilter Union(const std::vector<PbiFilter>& filters);
+
+ /// \brief Creates a PbiFilter that acts as a union of the input filters.
+ ///
+ /// A record must satisfy \b any of this filter's direct "child" filters.
+ ///
+ /// Equivalent to:
+ /// \include code/PbiFilter_Union_Move.txt
+ ///
+ /// \param[in] filters vector of child filters
+ /// \returns composite filter
+ ///
+ static PbiFilter Union(std::vector<PbiFilter>&& filters);
+
+ /// \}
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates a PbiFilter from a %DataSet's described filters.
+ ///
+ /// A DataSet may contain a Filters element, itself a list of Filter
+ /// elements. Each Filter element will contain a Properties element, itself
+ /// a list of Property elements.
+ ///
+ /// The Filters hierarchy looks like this (in its XML output):
+ /// \verbinclude examples/plaintext/PbiFilter_DataSetXmlFilters.txt
+ ///
+ /// The resulting PbiFilter represents a union over all Filter elements,
+ /// with each Filter element requiring an intersection of all of its
+ /// Property criteria. These Property elements are mapped to built-in PBI
+ /// filter types. To use the labels in the example XML above, the filter
+ /// created here is equivalent to:
+ ///
+ /// (A && B) || (C && D)
+ ///
+ /// If a DataSet lacks any Filters, then an empty PbiFilter will be created
+ /// - corresponding to the dataset's entire contents.
+ ///
+ /// \param[in] dataset maybe containing filters
+ /// \returns composite filter
+ ///
+ static PbiFilter FromDataSet(const DataSet& dataset);
+
+public:
+
+ /// \brief Creates an empty filter.
+ ///
+ /// \note An empty filter will result in all records being returned, e.g.
+ /// for query iteration.
+ ///
+ /// \param[in] type composition type. Any additional child filters added to
+ /// this composite will be treated according to this type.
+ /// If INTERSECT, a record must match all child filters. If
+ /// UNION, a record must match any child filter.
+ ///
+ PbiFilter(const CompositionType type = INTERSECT);
+
+ /// \brief Creates a composite filter (of INTERSECT type) with an initial
+ /// child filter.
+ ///
+ /// \note T must satisfy PbiFilterConcept
+ ///
+ /// \param[in] filter initial child filter
+ ///
+ template<typename T>
+ PbiFilter(const T& filter);
+
+ /// \brief Creates a composite filter (of INTERSECT type) with an initial
+ /// child filter.
+ ///
+ /// \note T must satisfy PbiFilterConcept
+ ///
+ /// \param[in] filter initial child filter
+ ///
+ template<typename T>
+ PbiFilter(T&& filter);
+
+ /// \brief Creates a composite filter (of INTERSECT type) with a list of
+ /// initial child filters.
+ ///
+ /// \param[in] filters initial child filters
+ ///
+ PbiFilter(const std::vector<PbiFilter>& filters);
+
+ /// \brief Creates composite filter (of INTERSECT type) with a list of
+ /// initial child filters.
+ ///
+ /// \param[in] filters initial child filters
+ ///
+ PbiFilter(std::vector<PbiFilter>&& filters);
+
+ PbiFilter(const PbiFilter& other);
+ PbiFilter(PbiFilter&& other) noexcept;
+ PbiFilter& operator=(const PbiFilter& other);
+ PbiFilter& operator=(PbiFilter&& other) noexcept;
+ ~PbiFilter(void);
+
+ /// \}
+
+public:
+ /// \name Composition
+ /// \{
+
+ /// \brief Adds a new child filter of type T.
+ ///
+ /// \param[in] filter additional child filter. Type T must satisfy
+ /// PbiFilterConcept.
+ /// \returns reference to this filter
+ ///
+ template<typename T>
+ PbiFilter& Add(const T& filter);
+
+ /// \brief Adds a new child filter of type T.
+ ///
+ /// \param[in] filter additional child filter. Type T must satisfy
+ /// PbiFilterConcept.
+ /// \returns reference to this filter
+ ///
+ template<typename T>
+ PbiFilter& Add(T&& filter);
+
+ /// \brief Adds a new child filter.
+ ///
+ /// \param[in] filter additional child filter
+ /// \returns reference to this filter
+ ///
+ PbiFilter& Add(const PbiFilter& filter);
+
+ /// \brief Adds a new child filter.
+ ///
+ /// \param[in] filter additional child filter
+ /// \returns reference to this filter
+ ///
+ PbiFilter& Add(PbiFilter&& filter);
+
+ /// \brief Add child filters.
+ ///
+ /// \param[in] filters additional child filters
+ /// \returns reference to this filter
+ ///
+ PbiFilter& Add(const std::vector<PbiFilter>& filters);
+
+ /// \brief Add child filters.
+ ///
+ /// \param[in] filters additional child filters
+ /// \returns reference to this filter
+ ///
+ PbiFilter& Add(std::vector<PbiFilter>&& filters);
+
+ /// \returns true if this filter has no child filters.
+ bool IsEmpty(void) const;
+
+ /// \}
+
+public:
+ /// \name Lookup
+ /// \{
+
+ /// \brief Performs the PBI index lookup, combining child results a
+ /// composite filter.
+ ///
+ /// \param[in] idx PBI (raw) index object
+ /// \param[in] row record number in %BAM/PBI files
+ ///
+ /// \returns true if record at \p row passes this filter criteria,
+ /// including children (if any)
+ ///
+ bool Accepts(const BAM::PbiRawData& idx, const size_t row) const;
+
+ /// \}
+
+private:
+ std::unique_ptr<internal::PbiFilterPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/PbiFilter.inl"
+#include "pbbam/PbiFilterTypes.h"
+
+#endif // PBIFILTER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilterQuery.h
+/// \brief Defines the PbiFilterQuery class.
+//
+// Author: Derek Barnett
+
+#ifndef PBIFILTERQUERY_H
+#define PBIFILTERQUERY_H
+
+#include "pbbam/Config.h"
+#include "pbbam/PbiFilter.h"
+#include "pbbam/internal/QueryBase.h"
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The PbiFilter class provides iterable access to a DataSet's %BAM
+/// records, limiting results to those matching filter criteria.
+///
+/// Example:
+/// \include code/PbiFilterQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+/// Use BamFile::EnsurePacBioIndexExists before creating the query if one
+/// may not be present.
+///
+class PBBAM_EXPORT PbiFilterQuery : public internal::IQuery
+{
+public:
+ /// \brief Creates a new PbiFilterQuery, limiting record results to only
+ /// those matching filter criteria
+ ///
+ /// \param[in] filter filtering criteria
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or
+ /// PBI files.
+ ///
+ PbiFilterQuery(const PbiFilter& filter, const DataSet& dataset);
+
+ ~PbiFilterQuery(void);
+
+public:
+
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
+
+private:
+ struct PbiFilterQueryPrivate;
+ std::unique_ptr<PbiFilterQueryPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // PBIFILTERQUERY_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilterTypes.h
+/// \brief Defines the built-in PBI filters.
+//
+// Author: Derek Barnett
+
+#ifndef PBIFILTERTYPES_H
+#define PBIFILTERTYPES_H
+
+#include "pbbam/Compare.h"
+#include "pbbam/PbiFilter.h"
+#include "pbbam/PbiIndex.h"
+#include <boost/optional.hpp>
+#include <memory>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal {
+
+/// \internal
+///
+/// Provides basic container for value/compare-type pair
+///
+template<typename T>
+struct FilterBase
+{
+public:
+ T value_;
+ boost::optional<std::vector<T> > multiValue_;
+ Compare::Type cmp_;
+protected:
+ FilterBase(const T& value, const Compare::Type cmp);
+ FilterBase(T&& value, const Compare::Type cmp);
+ FilterBase(const std::vector<T>& values);
+ FilterBase(std::vector<T>&& values);
+protected:
+ bool CompareHelper(const T& lhs) const;
+private:
+ bool CompareSingleHelper(const T& lhs) const;
+ bool CompareMultiHelper(const T& lhs) const;
+};
+
+/// \internal
+///
+/// Dispatches the lookup to BarcodeLookupData
+///
+template<typename T, BarcodeLookupData::Field field>
+struct BarcodeDataFilterBase : public FilterBase<T>
+{
+protected:
+ BarcodeDataFilterBase(const T& value, const Compare::Type cmp);
+ BarcodeDataFilterBase(T&& value, const Compare::Type cmp);
+ BarcodeDataFilterBase(const std::vector<T>& values);
+ BarcodeDataFilterBase(std::vector<T>&& values);
+public:
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \internal
+///
+/// Dispatches the lookup to BasicLookupData
+///
+template<typename T, BasicLookupData::Field field>
+struct BasicDataFilterBase : public FilterBase<T>
+{
+protected:
+ BasicDataFilterBase(const T& value, const Compare::Type cmp);
+ BasicDataFilterBase(T&& value, const Compare::Type cmp);
+ BasicDataFilterBase(const std::vector<T>& values);
+ BasicDataFilterBase(std::vector<T>&& values);
+public:
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \internal
+///
+/// Dispatches the lookup to MappedLookupData
+///
+template<typename T, MappedLookupData::Field field>
+struct MappedDataFilterBase : public FilterBase<T>
+{
+protected:
+ MappedDataFilterBase(const T& value, const Compare::Type cmp);
+ MappedDataFilterBase(T&& value, const Compare::Type cmp);
+ MappedDataFilterBase(const std::vector<T>& values);
+ MappedDataFilterBase(std::vector<T>&& values);
+public:
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+} // namespace internal
+
+/// \brief The PbiAlignedEndFilter class provides a PbiFilter-compatible filter
+/// on aligned end.
+///
+/// Example: \include code/PbiAlignedEndFilter.txt
+///
+/// \sa BamRecord::AlignedEnd
+///
+struct PbiAlignedEndFilter
+ : public internal::MappedDataFilterBase<uint32_t, MappedLookupData::A_END>
+{
+public:
+ /// \brief Creates a filter on aligned end.
+ ///
+ /// \param[in] position value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiAlignedEndFilter(const uint32_t position,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiAlignedLengthFilter class provides a PbiFilter-compatible
+/// filter on aligned length.
+///
+/// Example: \include code/PbiAlignedLengthFilter.txt
+///
+/// \sa BamRecord::AlignedEnd, BamRecord::AlignedStart
+///
+struct PbiAlignedLengthFilter : public internal::FilterBase<uint32_t>
+{
+public:
+ /// \brief Creates a filter on aligned length.
+ ///
+ /// \param[in] length value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiAlignedLengthFilter(const uint32_t length,
+ const Compare::Type cmp = Compare::EQUAL);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \brief The PbiAlignedStartFilter class provides a PbiFilter-compatible
+/// filter on aligned start.
+///
+/// Example: \include code/PbiAlignedStartFilter.txt
+///
+/// \sa BamRecord::AlignedStart
+///
+struct PbiAlignedStartFilter
+ : public internal::MappedDataFilterBase<uint32_t, MappedLookupData::A_START>
+{
+public:
+ /// \brief Creates a filter on aligned start.
+ ///
+ /// \param[in] position value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiAlignedStartFilter(const uint32_t position,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiAlignedStrandFilter class provides a PbiFilter-compatible
+/// filter on aligned strand.
+///
+/// Example: \include code/PbiAlignedStrandFilter.txt
+///
+/// \sa BamRecord::AlignedStrand
+///
+struct PbiAlignedStrandFilter
+ : public internal::MappedDataFilterBase<Strand, MappedLookupData::STRAND>
+{
+public:
+ /// \brief Creates a strand filter.
+ ///
+ /// \param[in] strand strand value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiAlignedStrandFilter(const Strand strand,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiBarcodeFilter class provides a PbiFilter-compatible filter on
+/// barcode ID.
+///
+/// Any record with this barcode ID (forward or reverse) will pass this filter.
+///
+/// Example: \include code/PbiBarcodeFilter.txt
+///
+/// \sa BamRecord::BarcodeForward, BamRecord::BarcodeReverse
+///
+struct PbiBarcodeFilter
+{
+public:
+ /// \brief Creates a single-value barcode filter.
+ ///
+ /// \param[in] barcode barcode ID to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiBarcodeFilter(const int16_t barcode,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' barcode filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly, in either bc_forward or bc_reverse.
+ ///
+ /// \param[in] whitelist barcode IDs to compare on
+ ///
+ PbiBarcodeFilter(const std::vector<int16_t>& whitelist);
+
+ /// \brief Creates a 'whitelisted' barcode filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly, in either bc_forward or bc_reverse.
+ ///
+ /// \param[in] whitelist barcode IDs to compare on
+ ///
+ PbiBarcodeFilter(std::vector<int16_t>&& whitelist);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+ PbiFilter compositeFilter_;
+};
+
+/// \brief The PbiBarcodeForwardFilter class provides a PbiFilter-compatible
+/// filter on forward barcode ID.
+///
+/// Example: \include code/PbiBarcodeForwardFilter.txt
+///
+/// \sa BamRecord::BarcodeForward
+///
+struct PbiBarcodeForwardFilter
+ : public internal::BarcodeDataFilterBase<int16_t, BarcodeLookupData::BC_FORWARD>
+{
+public:
+ /// \brief Creates a single-value forward barcode filter.
+ ///
+ /// \param[in] bcFwdId (forward) barcode ID to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiBarcodeForwardFilter(const int16_t bcFwdId,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' forward barcode filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly, in bc_forward.
+ ///
+ /// \param[in] whitelist barcode IDs to compare on
+ ///
+ PbiBarcodeForwardFilter(const std::vector<int16_t>& whitelist);
+
+ /// \brief Creates a 'whitelisted' forward barcode filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly, in bc_forward.
+ ///
+ /// \param[in] whitelist barcode IDs to compare on
+ ///
+ PbiBarcodeForwardFilter(std::vector<int16_t>&& whitelist);
+};
+
+/// \brief The PbiBarcodeQualityFilter class provides a PbiFilter-compatible
+/// filter on barcode quality.
+///
+/// Example: \include code/PbiBarcodeQualityFilter.txt
+///
+/// \sa BamRecord::BarcodeQuality
+///
+struct PbiBarcodeQualityFilter
+ : public internal::BarcodeDataFilterBase<uint8_t, BarcodeLookupData::BC_QUALITY>
+{
+public:
+ /// \brief Creates a single-value barcode quality filter.
+ ///
+ /// \param[in] bcQuality barcode quality to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiBarcodeQualityFilter(const uint8_t bcQuality,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiBarcodeReverseFilter class provides a PbiFilter-compatible
+/// filter on forward barcode ID.
+///
+/// Example: \include code/PbiBarcodeReverseFilter.txt
+///
+/// \sa BamRecord::BarcodeReverse
+///
+struct PbiBarcodeReverseFilter
+ : public internal::BarcodeDataFilterBase<int16_t, BarcodeLookupData::BC_REVERSE>
+{
+public:
+ /// \brief Creates a single-value reverse barcode filter.
+ ///
+ /// \param[in] bcRevId (reverse) barcode ID to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiBarcodeReverseFilter(const int16_t bcRevId,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' reverse barcode filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly, in bc_reverse.
+ ///
+ /// \param[in] whitelist barcode IDs to compare on
+ ///
+ PbiBarcodeReverseFilter(const std::vector<int16_t>& whitelist);
+
+ /// \brief Creates a 'whitelisted' reverse barcode filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly, in bc_reverse.
+ ///
+ /// \param[in] whitelist barcode IDs to compare on
+ ///
+ PbiBarcodeReverseFilter(std::vector<int16_t>&& whitelist);
+};
+
+/// \brief The PbiBarcodesFilter class provides a PbiFilter-compatible filter on
+/// both forward & reverse barcode IDs.
+///
+/// A record must match both IDs to pass the filter.
+///
+/// Example: \include code/PbiBarcodesFilter.txt
+///
+/// \sa BamRecord::Barcodes
+///
+struct PbiBarcodesFilter
+{
+public:
+ /// \brief Creates a barcodes filter from a std::pair of IDs.
+ ///
+ /// pair.first -> BarcodeForward\n
+ /// pair.second -> BarcodeReverse
+ ///
+ /// \param[in] barcodes barcode IDs to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiBarcodesFilter(const std::pair<int16_t, int16_t> barcodes,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a barcodes filter from forward & reverse IDs.
+ ///
+ /// \param[in] bcForward forward barcode ID to compare on
+ /// \param[in] bcReverse reverse barcode ID to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiBarcodesFilter(const int16_t bcForward,
+ const int16_t bcReverse,
+ const Compare::Type cmp = Compare::EQUAL);
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+ PbiFilter compositeFilter_;
+};
+
+/// \brief The PbiIdentityFilter class provides a PbiFilter-compatible filter on
+/// read identity (% aligned match).
+///
+/// Read identity is equivalent to: 1.0 - (nMM + nDel + nIns)/readLength.
+///
+/// Example: \include code/PbiIdentityFilter.txt
+///
+struct PbiIdentityFilter : public internal::FilterBase<float>
+{
+public:
+ /// \brief Creates a read identity filter.
+ ///
+ /// \param[in] identity value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiIdentityFilter(const float identity,
+ const Compare::Type cmp = Compare::EQUAL);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \brief The PbiLocalContextFilter class provides a PbiFilter-compatible
+/// filter on local context (adapter, barcode, etc.).
+///
+/// The primary Compare::Type operators intended for this filter are:
+/// Compare::EQUAL, Compare::NOT_EQUAL, Compare::CONTAINS, and
+/// Compare::NOT_CONTAINS.
+///
+/// Example: \include code/PbiLocalContextFilter.txt
+///
+struct PbiLocalContextFilter
+ : public internal::BasicDataFilterBase<LocalContextFlags,
+ BasicLookupData::CONTEXT_FLAG >
+{
+public:
+ PbiLocalContextFilter(const LocalContextFlags& flags,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiMapQualityFilter class provides a PbiFilter-compatible filter on
+/// mapping quality.
+///
+/// Example: \include code/PbiMapQualityFilter.txt
+///
+/// \sa BamRecord::MapQuality
+///
+struct PbiMapQualityFilter
+ : public internal::MappedDataFilterBase<uint8_t, MappedLookupData::MAP_QUALITY>
+{
+public:
+ /// \brief Creates a map quality filter.
+ ///
+ /// \param[in] mapQual value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiMapQualityFilter(const uint8_t mapQual,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiMovieNameFilter class provides a PbiFilter-compatible filter
+/// on movie name.
+///
+/// Example: \include code/PbiMovieNameFilter.txt
+///
+/// \sa BamRecord::MovieName
+///
+struct PbiMovieNameFilter
+{
+public:
+ /// \brief Creates a single-value movie name filter.
+ ///
+ /// \param[in] movieName movie name to compare on
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match movie name, exactly.
+ ///
+ PbiMovieNameFilter(const std::string& movieName);
+
+ /// \brief Creates a 'whitelisted' movie name filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist movie names to compare on
+ ///
+ PbiMovieNameFilter(const std::vector<std::string>& whitelist);
+
+ /// \brief Creates a 'whitelisted' movie name filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist movie names to compare on
+ ///
+ PbiMovieNameFilter(std::vector<std::string>&& whitelist);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+ PbiFilter compositeFilter_;
+};
+
+/// \brief The PbiNumDeletedBasesFilter class provides a PbiFilter-compatible
+/// filter on the number of deleted bases.
+///
+/// Example: \include code/PbiNumDeletedBasesFilter.txt
+///
+/// \sa BamRecord::NumDeletedBases
+///
+struct PbiNumDeletedBasesFilter
+ : public internal::MappedDataFilterBase<size_t, MappedLookupData::N_DEL>
+{
+public:
+ /// \brief Creates a filter on the number of deleted bases.
+ ///
+ /// \param[in] numDeletions value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiNumDeletedBasesFilter(const size_t numDeletions,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiNumInsertededBasesFilter class provides a PbiFilter-compatible
+/// filter on the number of inserted bases.
+///
+/// Example: \include code/PbiNumInsertedBasesFilter.txt
+///
+/// \sa BamRecord::NumInsertedBases
+///
+struct PbiNumInsertedBasesFilter
+ : public internal::MappedDataFilterBase<size_t, MappedLookupData::N_INS>
+{
+public:
+ /// \brief Creates a filter on the number of inserted bases.
+ ///
+ /// \param[in] numInsertions value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiNumInsertedBasesFilter(const size_t numInsertions,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiNumMatchesFilter class provides a PbiFilter-compatible filter
+/// on the number of matched bases.
+///
+/// Example: \include code/PbiNumMatchesFilter.txt
+///
+/// \sa BamRecord::NumMatches
+///
+struct PbiNumMatchesFilter
+ : public internal::MappedDataFilterBase<size_t, MappedLookupData::N_M>
+{
+public:
+ /// \brief Creates a filter on the number of matched bases.
+ ///
+ /// \param[in] numMatchedBases value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiNumMatchesFilter(const size_t numMatchedBases,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiNumMismatchesFilter class provides a PbiFilter-compatible
+/// filter on the number of mismatched bases.
+///
+/// Example: \include code/PbiNumMismatchesFilter.txt
+///
+/// \sa BamRecord::NumMismatches
+///
+struct PbiNumMismatchesFilter
+ : public internal::MappedDataFilterBase<size_t, MappedLookupData::N_MM>
+{
+public:
+ /// \brief Creates a filter on the number of mismatched bases.
+ ///
+ /// \param[in] numMismatchedBases value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiNumMismatchesFilter(const size_t numMismatchedBases,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiQueryEndFilter class provides a PbiFilter-compatible filter
+/// on query end.
+///
+/// Example: \include code/PbiQueryEndFilter.txt
+///
+/// \sa BamRecord::QueryEnd
+///
+struct PbiQueryEndFilter
+ : public internal::BasicDataFilterBase<int32_t, BasicLookupData::Q_END>
+{
+public:
+ /// \brief Creates a filter on query end position.
+ ///
+ /// \param[in] position value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiQueryEndFilter(const int32_t position,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiQueryLengthFilter class provides a PbiFilter-compatible filter
+/// on query length.
+///
+/// queryLength = (queryEnd - queryStart)
+///
+/// Example: \include code/PbiQueryLengthFilter.txt
+///
+/// \sa BamRecord::QueryEnd, BamRecord::QueryStart
+///
+struct PbiQueryLengthFilter : public internal::FilterBase<int32_t>
+{
+public:
+ /// \brief Creates a filter on query length
+ ///
+ /// \param[in] length value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiQueryLengthFilter(const int32_t length,
+ const Compare::Type cmp = Compare::EQUAL);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+};
+
+/// \brief The PbiQueryNameFilter class provides a PbiFilter-compatible filter
+/// on name length.
+///
+/// Example: \include code/PbiQueryNameFilter.txt
+///
+/// \sa BamRecord::FullName
+///
+struct PbiQueryNameFilter
+{
+public:
+ /// \brief Creates a single-value query name filter.
+ ///
+ /// \param[in] qname query name to compare on
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match query name, exactly.
+ ///
+ PbiQueryNameFilter(const std::string& qname);
+
+ /// \brief Creates a 'whitelisted' query name filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist query names to compare on
+ ///
+ PbiQueryNameFilter(const std::vector<std::string>& whitelist);
+
+ PbiQueryNameFilter(const PbiQueryNameFilter& other);
+ ~PbiQueryNameFilter(void);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+ struct PbiQueryNameFilterPrivate;
+ std::unique_ptr<PbiQueryNameFilterPrivate> d_;
+};
+
+/// \brief The PbiQueryStartFilter class provides a PbiFilter-compatible filter
+/// on query start.
+///
+/// Example: \include code/PbiQueryStartFilter.txt
+///
+/// \sa BamRecord::QueryStart
+///
+struct PbiQueryStartFilter
+ : public internal::BasicDataFilterBase<int32_t, BasicLookupData::Q_START>
+{
+public:
+ /// \brief Creates a filter on query start position.
+ ///
+ /// \param[in] position value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiQueryStartFilter(const int32_t position,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiReadAccuracyFilter class provides a PbiFilter-compatible filter
+/// on read accuracy.
+///
+/// Example: \include code/PbiReadAccuracyFilter.txt
+///
+/// \sa BamRecord::ReadAccuracy
+///
+struct PbiReadAccuracyFilter
+ : public internal::BasicDataFilterBase<Accuracy, BasicLookupData::READ_QUALITY>
+{
+public:
+ /// \brief Creates a filter on read accuracy.
+ ///
+ /// \param[in] accuracy value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiReadAccuracyFilter(const Accuracy accuracy,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiReadGroupFilter class provides a PbiFilter-compatible filter
+/// on read group.
+///
+/// Example: \include code/PbiReadGroupFilter.txt
+///
+/// \sa BamRecord::ReadGroup,
+/// BamRecord::ReadGroupId,
+/// BamRecord::ReadGroupNumericId
+///
+struct PbiReadGroupFilter
+ : public internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>
+{
+public:
+ /// \brief Creates a filter on read group (numeric) ID value
+ ///
+ /// \param[in] rgId numeric read group ID
+ /// \param[in] cmp compare type
+ ///
+ /// \sa BamRecord::ReadGroupNumericId
+ ///
+ PbiReadGroupFilter(const int32_t rgId,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a filter on printable read group ID value
+ ///
+ /// \param[in] rgId read group ID string
+ /// \param[in] cmp compare type
+ ///
+ /// \sa BamRecord::ReadGroupId
+ ///
+ PbiReadGroupFilter(const std::string rgId,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a filter on read group (object).
+ ///
+ /// \param[in] rg read group object
+ /// \param[in] cmp compare type
+ ///
+ /// \sa BamRecord::ReadGroup
+ ///
+ PbiReadGroupFilter(const ReadGroupInfo& rg,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' filter on read group numeric IDs.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist read group IDs to compare on
+ ///
+ PbiReadGroupFilter(const std::vector<int32_t>& whitelist);
+
+ /// \brief Creates a 'whitelisted' filter on read group numeric IDs.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist read group IDs to compare on
+ ///
+ PbiReadGroupFilter(std::vector<int32_t>&& whitelist);
+
+ /// \brief Creates a 'whitelisted' filter on read group printable IDs.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist read group ID strings to compare on
+ ///
+ PbiReadGroupFilter(const std::vector<std::string>& whitelist);
+
+ /// \brief Creates a 'whitelisted' filter on read group printable IDs.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist read group ID strings to compare on
+ ///
+ PbiReadGroupFilter(std::vector<std::string>&& whitelist);
+
+ /// \brief Creates a 'whitelisted' filter using read group objects.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist read group objects to compare on
+ ///
+ PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist);
+
+ /// \brief Creates a 'whitelisted' filter using read group objects.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist read group objects to compare on
+ ///
+ PbiReadGroupFilter(std::vector<ReadGroupInfo>&& whitelist);
+};
+
+/// \brief The PbiReferenceEndFilter class provides a PbiFilter-compatible
+/// filter on reference end.
+///
+/// Example: \include code/PbiReferenceEndFilter.txt
+///
+/// \sa BamRecord::ReferenceEnd
+///
+struct PbiReferenceEndFilter
+ : public internal::MappedDataFilterBase<uint32_t, MappedLookupData::T_END>
+{
+public:
+ /// \brief Creates a filter on reference end.
+ ///
+ /// \param[in] tEnd value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiReferenceEndFilter(const uint32_t tEnd,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiReferenceIdFilter class provides a PbiFilter-compatible
+/// filter on reference ID.
+///
+/// Example: \include code/PbiReferenceIdFilter.txt
+///
+/// \sa BamRecord::ReferenceId
+///
+struct PbiReferenceIdFilter
+ : public internal::MappedDataFilterBase<int32_t, MappedLookupData::T_ID>
+{
+public:
+ /// \brief Creates a single-value reference ID filter.
+ ///
+ /// \param[in] tId reference ID to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiReferenceIdFilter(const int32_t tId,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' reference ID filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist reference IDs to compare on
+ ///
+ PbiReferenceIdFilter(const std::vector<int32_t>& whitelist);
+
+ /// \brief Creates a 'whitelisted' reference ID filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist reference IDs to compare on
+ ///
+ PbiReferenceIdFilter(std::vector<int32_t>&& whitelist);
+};
+
+/// \brief The PbiReferenceNameFilter class provides a PbiFilter-compatible
+/// filter on reference name.
+///
+/// Example: \include code/PbiReferenceNameFilter.txt
+///
+/// \sa BamRecord::ReferenceName
+///
+struct PbiReferenceNameFilter
+{
+public:
+ /// \brief Creates a single-value reference name filter.
+ ///
+ /// \param[in] rname reference ID to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiReferenceNameFilter(const std::string& rname,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' reference name filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist reference names to compare on
+ ///
+ PbiReferenceNameFilter(const std::vector<std::string>& whitelist);
+
+ /// \brief Creates a 'whitelisted' reference name filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist reference names to compare on
+ ///
+ PbiReferenceNameFilter(std::vector<std::string>&& whitelist);
+
+public:
+ /// \brief Performs the actual index lookup.
+ ///
+ /// Most client code should not need to use this method directly.
+ ///
+ bool Accepts(const PbiRawData& idx, const size_t row) const;
+
+private:
+ mutable bool initialized_;
+ mutable PbiFilter subFilter_;
+ std::string rname_;
+ boost::optional<std::vector<std::string> > rnameWhitelist_;
+ Compare::Type cmp_;
+
+private:
+ // marked const so we can delay setup of filter in Accepts(), once we have
+ // access to PBI/BAM input. modified values marked mutable accordingly
+ void Initialize(const PbiRawData& idx) const;
+};
+
+/// \brief The PbiReferenceStartFilter class provides a PbiFilter-compatible
+/// filter on reference start.
+///
+/// Example: \include code/PbiReferenceStartFilter.txt
+///
+/// \sa BamRecord::ReferenceStart
+///
+struct PbiReferenceStartFilter
+ : public internal::MappedDataFilterBase<uint32_t, MappedLookupData::T_START>
+{
+public:
+ /// \brief Creates a filter on reference start.
+ ///
+ /// \param[in] tStart value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiReferenceStartFilter(const uint32_t tStart,
+ const Compare::Type cmp = Compare::EQUAL);
+};
+
+/// \brief The PbiZmwFilter class provides a PbiFilter-compatible filter on
+/// ZMW hole number.
+///
+/// Example: \include code/PbiZmwFilter.txt
+///
+/// \sa BamRecord::HoleNumber
+///
+struct PbiZmwFilter : public internal::BasicDataFilterBase<int32_t,
+ BasicLookupData::ZMW>
+{
+public:
+ /// \brief Creates a single-value ZMW hole number filter.
+ ///
+ /// \param[in] zmw value to compare on
+ /// \param[in] cmp compare type
+ ///
+ PbiZmwFilter(const int32_t zmw,
+ const Compare::Type cmp = Compare::EQUAL);
+
+ /// \brief Creates a 'whitelisted' ZMW hole number filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist ZMW hole numbers to compare on
+ ///
+ PbiZmwFilter(const std::vector<int32_t>& whitelist);
+
+ /// \brief Creates a 'whitelisted' ZMW hole number filter.
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Records will match at least one value from the
+ /// whitelist, exactly.
+ ///
+ /// \param[in] whitelist ZMW hole numbers to compare on
+ ///
+ PbiZmwFilter(std::vector<int32_t>&& whitelist);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/PbiFilterTypes.inl"
+
+#endif // PBIFILTERTYPES_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiIndex.h
+/// \brief Defines the PbiIndex class.
+//
+// Author: Derek Barnett
+
+#ifndef PBIINDEX_H
+#define PBIINDEX_H
+
+#include "pbbam/Config.h"
+#include "pbbam/PbiFile.h"
+#include "pbbam/PbiLookupData.h"
+#include <memory>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { class PbiIndexPrivate; }
+
+/// \brief The PbiIndex class provides an representation of PBI index data that
+/// is rearranged for quick lookups.
+///
+/// The PbiIndex class itself provides access to a few high-level attributes
+/// (e.g. version, number of records, etc.). The actual lookup data is stored
+/// in its member components:
+/// BasicLookupData,
+/// MappedLookupData,
+/// ReferenceLookupData, &
+/// BarcodeLookupData .
+///
+class PBBAM_EXPORT PbiIndex
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates a PbiIndex lookup structure from a PBI file.
+ ///
+ /// \param[in] pbiFilename filename
+ ///
+ /// \throws std::runtime_error if failed to load data from file
+ ///
+ PbiIndex(const std::string& pbiFilename);
+
+ PbiIndex(const PbiIndex& other);
+ PbiIndex(PbiIndex&& other);
+ PbiIndex& operator=(const PbiIndex& other);
+ PbiIndex& operator=(PbiIndex&& other);
+ ~PbiIndex(void);
+
+ /// \}
+
+public:
+ /// \name PBI General Attributes
+ /// \{
+
+ /// \returns true if index has BarcodeData section
+ bool HasBarcodeData(void) const;
+
+ /// \returns true if index has MappedData section
+ bool HasMappedData(void) const;
+
+ /// \returns true if index has ReferenceData section
+ bool HasReferenceData(void) const;
+
+ /// \returns true if index has \b section
+ /// \param[in] section PbiFile::Section identifier
+ ///
+ bool HasSection(const PbiFile::Section section) const;
+
+ /// \returns index filename ("*.pbi")
+ ///
+ /// \note Returns an empty string if the underlying data was generated, not
+ /// loaded from file.
+ ///
+ std::string Filename(void) const;
+
+ /// \returns enum flags representing the file sections present
+ PbiFile::Sections FileSections(void) const;
+
+ /// \returns the number of records in the PBI (& associated %BAM)
+ uint32_t NumReads(void) const;
+
+ /// \returns the PBI file's version
+ PbiFile::VersionEnum Version(void) const;
+
+ /// \}
+
+public:
+ /// \name Lookup Data Components
+ /// \{
+
+ /// \returns const reference to BarcodeData lookup structure
+ ///
+ /// May be empty, check result of HasBarcodeData.
+ ///
+ const BarcodeLookupData& BarcodeData(void) const;
+
+ /// \returns const reference to BasicData lookup structure
+ const BasicLookupData& BasicData(void) const;
+
+ /// \returns const reference to MappedData lookup structure
+ ///
+ /// May be empty, check result of HasMappedData.
+ ///
+ const MappedLookupData& MappedData(void) const;
+
+ /// \returns const reference to reference data lookup structure
+ ///
+ /// May be empty, check result of HasReferenceData.
+ ///
+ const ReferenceLookupData& ReferenceData(void) const;
+
+ /// }
+
+private:
+ PbiIndex(void);
+ std::unique_ptr<internal::PbiIndexPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "internal/PbiIndex.inl"
+
+#endif // PBIINDEX_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiIndexedBamReader.h
+/// \brief Defines the PbiIndexedBamReader class.
+//
+// Author: Derek Barnett
+
+#ifndef PBIINDEXEDBAMREADER_H
+#define PBIINDEXEDBAMREADER_H
+
+#include "pbbam/BamFile.h"
+#include "pbbam/BamReader.h"
+#include "pbbam/PbiBasicTypes.h"
+#include "pbbam/PbiFilter.h"
+#include "pbbam/PbiIndex.h"
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { struct PbiIndexedBamReaderPrivate; }
+
+/// \brief The PbiIndexedBamReader class provides read-only iteration over %BAM
+/// records, limited to some filtering criteria.
+///
+/// The PacBio BAM index (*.pbi) is used to allow random-access operations.
+///
+class PBBAM_EXPORT PbiIndexedBamReader : public BamReader
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Constructs %BAM reader, with an initial filter.
+ ///
+ /// All reads that satisfy the filter will be available.
+ ///
+ /// \param[in] filter PbiFilter or compatible object
+ /// \param[in] bamFilename input %BAM filename
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+ /// read
+ ///
+ PbiIndexedBamReader(const PbiFilter& filter, const std::string& bamFilename);
+
+ /// \brief Constructs %BAM reader, with an initial filter.
+ ///
+ /// All reads that satisfy the filter will be available.
+ ///
+ /// \param[in] filter PbiFilter or compatible object
+ /// \param[in] bamFile input BamFile object
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+ /// read
+ ///
+ PbiIndexedBamReader(const PbiFilter& filter, const BamFile& bamFile);
+
+ /// \brief Constructs %BAM reader, with an initial filter.
+ ///
+ /// All reads that satisfy the filter will be available.
+ ///
+ /// \param[in] filter PbiFilter or compatible object
+ /// \param[in] bamFile input BamFile object
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+ /// read
+ ///
+ PbiIndexedBamReader(const PbiFilter& filter, BamFile&& bamFile);
+
+ /// \brief Constructs %BAM reader, with no initial filter.
+ ///
+ /// Useful for delaying either specifying the filtering criteria or
+ /// performing the PBI lookups.
+ ///
+ /// \param[in] bamFilename input %BAM filename
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+ /// read
+ ///
+ PbiIndexedBamReader(const std::string& bamFilename);
+
+ /// \brief Constructs %BAM reader, with no initial filter.
+ ///
+ /// Useful for delaying either specifying the filtering criteria or
+ /// performing the PBI lookups.
+ ///
+ /// \param[in] bamFile input BamFile object
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+ /// read
+ ///
+ PbiIndexedBamReader(const BamFile& bamFile);
+
+ /// \brief Constructs %BAM reader, with no initial filter.
+ ///
+ /// Useful for delaying either specifying the filtering criteria or
+ /// performing the PBI lookups.
+ ///
+ /// \param[in] bamFile input BamFile object
+ ///
+ /// \throws std::runtime_error if either file (*.bam or *.pbi) cannot be
+ /// read
+ ///
+ PbiIndexedBamReader(BamFile&& bamFile);
+
+ ~PbiIndexedBamReader(void);
+
+ /// \}
+
+public:
+ /// \name Filtering & Index Data
+ /// \{
+
+ /// \returns the current filter active on this reader
+ const PbiFilter& Filter(void) const;
+
+// /// \returns the reader's underlying index data
+// const PbiIndex& Index(void) const;
+
+public:
+ /// \brief Sets a new filter on the reader.
+ ///
+ /// \param[in] filter
+ /// \returns reference to this reader
+ ///
+ PbiIndexedBamReader& Filter(const PbiFilter& filter);
+
+ /// \}
+
+protected:
+ int ReadRawData(BGZF* bgzf, bam1_t* b);
+
+private:
+ std::unique_ptr<internal::PbiIndexedBamReaderPrivate> d_;
+};
+
+} // namespace internal
+} // namespace BAM
+
+#endif // PBIINDEXEDBAMREADER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiLookupData.h
+/// \brief Defines the classes used for PBI data lookup.
+//
+// Author: Derek Barnett
+
+#ifndef PBILOOKUPDATA_H
+#define PBILOOKUPDATA_H
+
+#include "pbbam/Config.h"
+#include "pbbam/Compare.h"
+#include "pbbam/PbiBasicTypes.h"
+#include <deque>
+#include <map>
+#include <unordered_map>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+class PbiRawBarcodeData;
+class PbiRawBasicData;
+class PbiRawMappedData;
+class PbiRawReferenceData;
+
+/// \brief The OrderedLookup class provides a quick lookup structure for
+/// PBI index data, where key values are sorted.
+///
+/// The main, underlying lookup structure is essentailly a std::map, where the
+/// key is some value (e.g. readAccuracy) and the value is the list of indices
+/// (i-th record) in the %BAM file.
+///
+/// This lookup class is one of the main building blocks for the PBI index
+/// lookup components.
+///
+/// \param T type of key stored (Accuracy for readAccuracy, int32_t for ZMW,
+/// etc.)
+///
+template<typename T>
+class OrderedLookup
+{
+public:
+ typedef T key_type;
+ typedef IndexList value_type;
+ typedef std::map<key_type, value_type> container_type;
+ typedef typename container_type::iterator iterator;
+ typedef typename container_type::const_iterator const_iterator;
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty OrderedLookup structure.
+ ///
+ OrderedLookup(void);
+
+ /// \brief Creates an OrderedLookup struture, from another's underlying
+ /// lookup container.
+ ///
+ /// \param[in] data lookup data container
+ ///
+ OrderedLookup(const container_type& data);
+
+ /// \brief Creates an OrderedLookup struture, from another's underlying
+ /// lookup container.
+ ///
+ /// \param[in] data lookup data container
+ ///
+ OrderedLookup(container_type&& data);
+
+ /// \brief Creates an OrderedLookup struture, from raw data.
+ ///
+ /// \param[in] rawData raw data values, where i is the index into the %BAM
+ /// file, and rawData[i] is the key value
+ ///
+ OrderedLookup(const std::vector<T>& rawData);
+
+ /// \brief Creates an OrderedLookup struture, from raw data.
+ ///
+ /// \param[in] rawData raw data values, where i is the index into the %BAM
+ /// file, and rawData[i] is the key value
+ ///
+ OrderedLookup(std::vector<T>&& rawData);
+
+ /// \}
+
+public:
+ /// \name Operators
+ /// \{
+
+ /// \returns true if this lookup is same as \p other
+ bool operator==(const OrderedLookup<T>& other) const;
+
+ /// \returns true if this lookup is not the same as \p other
+ bool operator!=(const OrderedLookup<T>& other) const;
+
+ /// \}
+
+public:
+ /// \name STL-Compatibility Methods
+ /// \{
+
+ /// \returns an iterator to the first element in the underlying container
+ iterator begin(void);
+
+ /// \returns a const iterator to the first element in the underlying
+ /// container
+ const_iterator begin(void) const;
+
+ /// \returns a const iterator to the first element in the underlying
+ ///
+ const_iterator cbegin(void) const;
+
+ /// \returns an iterator after the last element in the underlying container
+ iterator end(void);
+
+ /// \returns a const iterator after the last element in the underlying
+ /// container
+ const_iterator end(void) const;
+
+ /// \returns a const iterator after the last element in the underlying
+ /// container
+ const_iterator cend(void) const;
+
+ /// \returns true if underlying container is empty
+ bool empty(void) const;
+
+ /// \returns number of keys in the container
+ size_t size(void) const;
+
+ /// \}
+
+public:
+ /// \name Lookup Data
+ /// \{
+
+ /// \brief Performs a lookup into the underlying data.
+ ///
+ /// \param[in] key key value to lookup
+ /// \param[in] compare compare type
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup key &
+ /// compare type
+ ///
+ IndexList LookupIndices(const key_type& key,
+ const Compare::Type& compare) const;
+
+ /// \brief Converts the lookup structure back into its raw data.
+ ///
+ /// \returns raw data values, where i is the index into the %BAM file, and
+ /// rawData[i] is the key value
+ ///
+ std::vector<T> Unpack(void) const;
+
+ /// \}
+
+private:
+ IndexList LookupInclusiveRange(const const_iterator& begin,
+ const const_iterator& end) const;
+
+ IndexList LookupExclusiveRange(const const_iterator& begin,
+ const const_iterator& end,
+ const key_type& key) const;
+
+private:
+ container_type data_;
+};
+
+/// \brief The UnorderedLookup class provides a quick lookup structure for
+/// PBI index data, where key values are not sorted.
+///
+/// The main, underlying lookup structure is essentailly a std::unordered_map,
+/// where the key is some value (e.g. read group ID) and the value is the list
+/// of indices (i-th record) in the %BAM file.
+///
+/// This lookup class is one of the main building blocks for the PBI index
+/// lookup components.
+///
+/// \param T type of key stored (Accuracy for readAccuracy, int32_t for ZMW,
+/// etc.)
+///
+template<typename T>
+class UnorderedLookup
+{
+public:
+ typedef T key_type;
+ typedef IndexList value_type;
+ typedef std::unordered_map<key_type, value_type> container_type;
+ typedef typename container_type::iterator iterator;
+ typedef typename container_type::const_iterator const_iterator;
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty UnorderedLookup structure.
+ ///
+ UnorderedLookup(void);
+
+ /// \brief Creates an UnorderedLookup struture, from another's underlying
+ /// lookup container.
+ ///
+ /// \param[in] data lookup data container
+ ///
+ UnorderedLookup(const container_type& data);
+
+ /// \brief Creates an UnorderedLookup struture, from another's underlying
+ /// lookup container.
+ ///
+ /// \param[in] data lookup data container
+ ///
+ UnorderedLookup(container_type&& data);
+
+ /// \brief Creates an UnorderedLookup struture, from raw data.
+ ///
+ /// \param[in] rawData raw data values, where i is the index into the %BAM
+ /// file, and rawData[i] is the key value
+ ///
+ UnorderedLookup(const std::vector<T>& rawData);
+
+ /// \brief Creates an UnorderedLookup struture, from raw data.
+ ///
+ /// \param[in] rawData raw data values, where i is the index into the %BAM
+ /// file, and rawData[i] is the key value
+ ///
+ UnorderedLookup(std::vector<T>&& rawData);
+
+ /// \}
+
+public:
+ /// \name Operators
+ /// \{
+
+ /// \returns true if this lookup is same as \p other
+ bool operator==(const UnorderedLookup<T>& other) const;
+
+ /// \returns true if this lookup is not the same as \p other
+ bool operator!=(const UnorderedLookup<T>& other) const;
+
+ /// \}
+
+public:
+ /// \name STL-Compatibility Methods
+ /// \{
+
+ /// \returns an iterator to the first element in the underlying container
+ iterator begin(void);
+
+ /// \returns a const iterator to the first element in the underlying
+ /// container
+ const_iterator begin(void) const;
+
+ /// \returns a const iterator to the first element in the underlying
+ ///
+ const_iterator cbegin(void) const;
+
+ /// \returns an iterator after the last element in the underlying container
+ iterator end(void);
+
+ /// \returns a const iterator after the last element in the underlying
+ /// container
+ const_iterator end(void) const;
+
+ /// \returns a const iterator after the last element in the underlying
+ /// container
+ const_iterator cend(void) const;
+
+ /// \returns true if underlying container is empty
+ bool empty(void) const;
+
+ /// \returns number of keys in the container
+ size_t size(void) const;
+
+ /// \}
+
+public:
+ /// \name Lookup Data
+ /// \{
+
+ /// \brief Performs a lookup into the underlying data.
+ ///
+ /// \param[in] key key value to lookup
+ /// \param[in] compare compare type
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup key &
+ /// compare type
+ ///
+ IndexList LookupIndices(const key_type& key,
+ const Compare::Type& compare) const;
+
+ /// \brief Converts the lookup structure back into its raw data.
+ ///
+ /// \returns raw data values, where i is the index into the %BAM file, and
+ /// rawData[i] is the key value
+ ///
+ std::vector<T> Unpack(void) const;
+
+ /// \}
+
+private:
+ template<typename Compare>
+ IndexList LookupHelper(const key_type& key,
+ const Compare& cmp) const;
+
+private:
+ container_type data_;
+};
+
+/// \brief The BasicLookupData class provides quick lookup access to the
+/// "BasicData" section of the PBI index.
+///
+class PBBAM_EXPORT BasicLookupData
+{
+public:
+ /// \brief This enum describes the component fields of the BasicData
+ /// section.
+ enum Field
+ {
+ RG_ID
+ , Q_START
+ , Q_END
+ , ZMW
+ , READ_QUALITY
+ , CONTEXT_FLAG
+ , VIRTUAL_OFFSET
+ };
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty lookup data object.
+ BasicLookupData(void);
+
+ /// \brief Creates a lookup data object from the corresponding raw data.
+ ///
+ /// \param[in] rawData raw data loaded from a PBI file
+ ///
+ BasicLookupData(const PbiRawBasicData& rawData);
+
+ /// \}
+
+public:
+ /// \name Lookup Data Methods
+ /// \{
+
+ /// \brief Adds \b virtual file offset data to the index lookup result
+ /// blocks.
+ ///
+ /// A PBI lookup will result in a number of index lists, depending on the
+ /// complexity of the PbiFilter involved. These index lists are then merged
+ /// down into blocks of contiguous values, where each block describes a
+ /// particular record index and the number of subsequent, contiguous reads
+ /// that immediately follow it. In this manner, we need only perform seeks
+ /// to the first record of each block.
+ ///
+ /// This method takes such blocks and annotates them with the corresponding
+ /// \b virtual file offset. Subsequent %BAM readers can use this information
+ /// to control file seeks.
+ ///
+ /// \param[in,out] blocks
+ ///
+ /// \throws std::out_of_range if a block has an invalid index value
+ ///
+ void ApplyOffsets(IndexResultBlocks& blocks) const;
+
+ /// \brief This method dispatches a single-value lookup query to the proper
+ /// data member.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \param[in] field section field to lookup
+ /// \param[in] value value to lookup
+ /// \param[in] compareType compare type
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup
+ ///
+ template<typename T>
+ IndexList Indices(const BasicLookupData::Field& field,
+ const T& value,
+ const Compare::Type& compareType = Compare::EQUAL) const;
+
+ /// \brief This method dispatches a multi-value lookup query to the proper
+ /// data member.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Results will correspond to an exact match on at
+ /// least one value in the list.
+ ///
+ /// \param[in] field section field to lookup
+ /// \param[in] values values to lookup
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup
+ ///
+ template<typename T>
+ IndexList IndicesMulti(const BasicLookupData::Field& field,
+ const std::vector<T>& values) const;
+
+ /// \returns the \b virtual file offsets for all records
+ ///
+ const std::vector<int64_t>& VirtualFileOffsets(void) const;
+
+ /// \}
+
+public:
+ /// \brief Lookup Data Members
+ /// \{
+
+ // map ordering doesn't make sense, optimize for direct lookup
+ UnorderedLookup<int32_t> rgId_;
+
+ // numeric comparisons make sense, keep key ordering preserved
+ OrderedLookup<int32_t> qStart_;
+ OrderedLookup<int32_t> qEnd_;
+ OrderedLookup<int32_t> holeNumber_;
+ OrderedLookup<float> readQual_;
+
+ // see if this works, or if can use unordered, 'direct' query
+ OrderedLookup<uint8_t> ctxtFlag_;
+
+ // offsets
+ std::vector<int64_t> fileOffset_;
+
+ /// \}
+};
+
+/// \brief The MappedLookupData class provides quick lookup access to the
+/// "MappedData" section of the PBI index.
+///
+class PBBAM_EXPORT MappedLookupData
+{
+public:
+ /// \brief This enum describes the component fields of the MappedData
+ /// section.
+ enum Field
+ {
+ T_ID
+ , T_START
+ , T_END
+ , A_START
+ , A_END
+ , N_M
+ , N_MM
+ , N_INS
+ , N_DEL
+ , MAP_QUALITY
+ , STRAND
+ };
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty lookup data object.
+ MappedLookupData(void);
+
+ /// \brief Creates a lookup data object from the corresponding raw data.
+ ///
+ /// \param[in] rawData raw data loaded from a PBI file
+ ///
+ MappedLookupData(const PbiRawMappedData& rawData);
+
+ /// \}
+
+public:
+ /// \name Lookup Data Methods
+ /// \{
+
+ /// \brief This method dispatches a single-value lookup query to the proper
+ /// data member.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \param[in] field section field to lookup
+ /// \param[in] value value to lookup
+ /// \param[in] compareType compare type
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup
+ ///
+ template<typename T>
+ IndexList Indices(const MappedLookupData::Field& field,
+ const T& value,
+ const Compare::Type& compareType = Compare::EQUAL) const;
+
+ /// \brief This method dispatches a multi-value lookup query to the proper
+ /// data member.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Results will correspond to an exact match on at
+ /// least one value in the list.
+ ///
+ /// \param[in] field section field to lookup
+ /// \param[in] values values to lookup
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup
+ ///
+ template<typename T>
+ IndexList IndicesMulti(const MappedLookupData::Field& field,
+ const std::vector<T>& values) const;
+
+ /// \}
+
+public:
+ /// \name Lookup Data Members
+ /// \{
+
+ // numeric comparisons make sense, keep key ordering preserved
+ OrderedLookup<int32_t> tId_;
+ OrderedLookup<uint32_t> tStart_;
+ OrderedLookup<uint32_t> tEnd_;
+ OrderedLookup<uint32_t> aStart_;
+ OrderedLookup<uint32_t> aEnd_;
+ OrderedLookup<uint32_t> nM_;
+ OrderedLookup<uint32_t> nMM_;
+ OrderedLookup<uint8_t> mapQV_;
+
+ // generated values, not stored directly in PBI file
+ OrderedLookup<uint32_t> nIns_;
+ OrderedLookup<uint32_t> nDel_;
+
+ // no need for map overhead, just store direct indices
+ IndexList reverseStrand_;
+ IndexList forwardStrand_;
+
+ /// \}
+};
+
+/// \brief The ReferenceLookupData class provides quick lookup access to the
+/// "CoordinateSortedData" section of the PBI index.
+///
+class PBBAM_EXPORT ReferenceLookupData
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty lookup data object.
+ ///
+ ReferenceLookupData(void);
+
+ /// \brief Creates a lookup data object from the corresponding raw data.
+ ///
+ /// \param[in] rawData raw data loaded from a PBI file
+ ///
+ ReferenceLookupData(const PbiRawReferenceData& rawData);
+
+ /// \}
+
+public:
+ /// \name Lookup Data Methods
+ /// \{
+
+ /// \brief Retrieves the index range for all records that map to a
+ /// particular reference.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \param[in] tId reference ID to lookup
+ ///
+ /// \returns resulting index range [begin, end). If \p tId is unknown,
+ /// will return IndexRange(-1,-1) .
+ ///
+ IndexRange Indices(const int32_t tId) const;
+
+ /// \}
+
+public:
+ /// \name Lookup Data Members
+ /// \{
+
+ // references_[tId] = [begin, end) indices
+ std::unordered_map<int32_t, IndexRange> references_;
+
+ /// \}
+};
+
+/// \brief The BarcodeLookupData class provides quick lookup access to the
+/// "BarcodeData" section of the PBI index.
+///
+class PBBAM_EXPORT BarcodeLookupData
+{
+public:
+ /// \brief This enum describes the component fields of the BarcodeData
+ /// section.
+ enum Field
+ {
+ BC_FORWARD
+ , BC_REVERSE
+ , BC_QUALITY
+ };
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty lookup data object.
+ ///
+ BarcodeLookupData(void);
+
+ /// \brief Creates a lookup data object from the corresponding raw data.
+ ///
+ /// \param[in] rawData raw data loaded from a PBI file
+ ///
+ BarcodeLookupData(const PbiRawBarcodeData& rawData);
+
+ /// \}
+
+public:
+ /// \name Lookup Data Methods
+ /// \{
+
+ /// \brief This method dispatches a single-value lookup query to the proper
+ /// data member.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \param[in] field section field to lookup
+ /// \param[in] value value to lookup
+ /// \param[in] compareType compare type
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup
+ ///
+ template<typename T>
+ IndexList Indices(const BarcodeLookupData::Field& field,
+ const T& value,
+ const Compare::Type& compareType = Compare::EQUAL) const;
+
+ /// \brief This method dispatches a multi-value lookup query to the proper
+ /// data member.
+ ///
+ /// Client code, such as custom filters, should use this when possible, only
+ /// touching the raw fields for more complex operations (e.g. when unpacking
+ /// is necessary).
+ ///
+ /// \note There is no compare type parameter here, it is always
+ /// Compare::EQUAL. Results will correspond to an exact match on at
+ /// least one value in the list.
+ ///
+ /// \param[in] field section field to lookup
+ /// \param[in] values values to lookup
+ ///
+ /// \returns sorted list of unique indices that satisfy the lookup
+ ///
+ template<typename T>
+ IndexList IndicesMulti(const BarcodeLookupData::Field& field,
+ const std::vector<T>& values) const;
+
+ /// \}
+
+public:
+ /// \name Lookup Data Members
+ /// \{
+
+ // numeric comparisons make sense, keep key ordering preserved
+ OrderedLookup<int16_t> bcForward_;
+ OrderedLookup<int16_t> bcReverse_;
+ OrderedLookup<int8_t> bcQual_;
+
+ /// \}
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "internal/PbiLookupData.inl"
+
+#endif // PBILOOKUPDATA_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiRawData.h
+/// \brief Defines the classes used for working with raw PBI data.
+//
+// Author: Derek Barnett
+
+#ifndef PBIRAWDATA_H
+#define PBIRAWDATA_H
+
+#include "pbbam/Config.h"
+#include "pbbam/PbiFile.h"
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+class BamRecord;
+class DataSet;
+
+/// \brief The PbiRawBarcodeData class represents the raw data stored in the
+/// "BarcodeData" section of the PBI index.
+///
+class PBBAM_EXPORT PbiRawBarcodeData
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty data structure.
+ PbiRawBarcodeData(void);
+
+ /// \brief Creates an empty data structure, preallocating space for a known
+ /// number of records.
+ PbiRawBarcodeData(uint32_t numReads);
+
+ PbiRawBarcodeData(const PbiRawBarcodeData& other);
+ PbiRawBarcodeData(PbiRawBarcodeData&& other);
+ PbiRawBarcodeData& operator=(const PbiRawBarcodeData& other);
+ PbiRawBarcodeData& operator=(PbiRawBarcodeData&& other);
+
+ /// \}
+
+public:
+ /// \name Index Construction
+ /// \{
+
+ /// \brief Adds a record's barcode data.
+ ///
+ /// \param[in] b %BAM record
+ ///
+ void AddRecord(const BamRecord& b);
+
+ /// \}
+
+public:
+ /// \name Raw Data Containers
+ /// \{
+
+ std::vector<int16_t> bcForward_;
+ std::vector<int16_t> bcReverse_;
+ std::vector<int8_t> bcQual_;
+
+ /// \}
+};
+
+/// \brief The PbiRawMappedData class represents the raw data stored in the
+/// "MappedData" section of the PBI index.
+///
+class PBBAM_EXPORT PbiRawMappedData
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty data structure.
+ PbiRawMappedData(void);
+
+ /// \brief Creates an empty data structure, preallocating space for a known
+ /// number of records.
+ PbiRawMappedData(uint32_t numReads);
+
+ PbiRawMappedData(const PbiRawMappedData& other);
+ PbiRawMappedData(PbiRawMappedData&& other);
+ PbiRawMappedData& operator=(const PbiRawMappedData& other);
+ PbiRawMappedData& operator=(PbiRawMappedData&& other);
+
+ /// \}
+
+public:
+ /// \name Index Construction
+ /// \{
+
+ /// \brief Adds a record's mapping data.
+ ///
+ /// \param[in] b %BAM record
+ ///
+ void AddRecord(const BamRecord& b);
+
+ /// \}
+
+public:
+ /// \name Index Data Query
+ /// \{
+
+ /// \brief Calculates the number of deleted bases for a particular record.
+ ///
+ /// Convenvience method. Equivalent to:
+ /// \code{.cpp}
+ /// NumDeletedAndInsertedBasesAt(i).first;
+ /// \endcode
+ ///
+ /// \param[in] recordIndex i-th record
+ /// \returns number of deleted bases
+ ///
+ uint32_t NumDeletedBasesAt(size_t recordIndex) const;
+
+ /// \brief Calculates the number of inserted bases for a particular record.
+ ///
+ /// Convenvience method. Equivalent to:
+ /// \code{.cpp}
+ /// NumDeletedAndInsertedBasesAt(i).second;
+ /// \endcode
+ ///
+ /// \param[in] recordIndex i-th record
+ /// \returns number of inserted bases
+ ///
+ uint32_t NumInsertedBasesAt(size_t recordIndex) const;
+
+ /// \brief Calculates the number of deleted & inserted bases for a
+ /// particular record.
+ ///
+ /// \param[in] recordIndex i-th record in the data set
+ /// \returns a pair consisting of (numDeletions,numInsertions)
+ ///
+ std::pair<uint32_t, uint32_t>
+ NumDeletedAndInsertedBasesAt(size_t recordIndex) const;
+
+ /// \}
+
+public:
+ /// \name Raw Data Containers
+ /// \{
+
+ std::vector<int32_t> tId_;
+ std::vector<uint32_t> tStart_;
+ std::vector<uint32_t> tEnd_;
+ std::vector<uint32_t> aStart_;
+ std::vector<uint32_t> aEnd_;
+ std::vector<uint8_t> revStrand_;
+ std::vector<uint32_t> nM_;
+ std::vector<uint32_t> nMM_;
+ std::vector<uint8_t> mapQV_;
+
+ /// \}
+};
+
+/// \brief The PbiReferenceEntryClass represents a single reference in the PBI
+/// CoordinateSorted section.
+///
+/// A reference entry consists of an associated reference ID (tId), as well as
+/// start and end indices into the %BAM or PBI.
+///
+/// \note Rows are given in the interval [start, end).
+///
+class PBBAM_EXPORT PbiReferenceEntry
+{
+public:
+ typedef uint32_t ID;
+ typedef uint32_t Row;
+
+public:
+ static const ID UNMAPPED_ID;
+ static const Row UNSET_ROW;
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates a default entry.
+ ///
+ /// - default ID: PbiReferenceEntry::UNMAPPED_ID \n
+ /// - default rows: PbiReferenceEntry::UNSET_ROW
+ ///
+ PbiReferenceEntry(void);
+
+ /// \brief Creates a reference entry, with no rows set.
+ ///
+ /// - default rows: PbiReferenceEntry::UNSET_ROW
+ ///
+ PbiReferenceEntry(ID id);
+
+ /// \brief Creates a reference entry, with rows set.
+ ///
+ PbiReferenceEntry(ID id, Row beginRow, Row endRow);
+
+ PbiReferenceEntry(const PbiReferenceEntry& other);
+ PbiReferenceEntry(PbiReferenceEntry&& other);
+ PbiReferenceEntry& operator=(const PbiReferenceEntry& other);
+ PbiReferenceEntry& operator=(PbiReferenceEntry&& other);
+
+ bool operator==(const PbiReferenceEntry& other) const;
+
+ /// \}
+
+public:
+ /// \name Reference Data Members
+ /// \{
+
+ ID tId_;
+ Row beginRow_;
+ Row endRow_;
+
+ /// \}
+};
+
+/// \brief The PbiRawReferenceData class represents the raw data stored in the
+/// "CoordinateSortedData" section of the PBI index.
+///
+class PBBAM_EXPORT PbiRawReferenceData
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty data structure.
+ PbiRawReferenceData(void);
+
+ /// \brief Creates an empty data structure, preallocating space for a
+ /// number of references.
+ ///
+ /// This constructor is recommended as this is the safest way to ensure that
+ /// references without observed mappings are included in the final output.
+ ///
+ PbiRawReferenceData(uint32_t numRefs);
+
+ PbiRawReferenceData(const PbiRawReferenceData& other);
+ PbiRawReferenceData(PbiRawReferenceData&& other);
+ PbiRawReferenceData& operator=(const PbiRawReferenceData& other);
+ PbiRawReferenceData& operator=(PbiRawReferenceData&& other);
+
+ /// \}
+
+public:
+ /// \name Raw Data Containers
+ /// \{
+
+ std::vector<PbiReferenceEntry> entries_;
+
+ /// \}
+};
+
+/// \brief The PbiRawBasicData class represents the raw data stored in the
+/// "BasicData" section of the PBI index.
+///
+class PBBAM_EXPORT PbiRawBasicData
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty data structure.
+ PbiRawBasicData(void);
+
+ /// \brief Creates an empty data structure, preallocating space for a known
+ /// number of records.
+ PbiRawBasicData(uint32_t numReads);
+
+ PbiRawBasicData(const PbiRawBasicData& other);
+ PbiRawBasicData(PbiRawBasicData&& other);
+ PbiRawBasicData& operator=(const PbiRawBasicData& other);
+ PbiRawBasicData& operator=(PbiRawBasicData&& other);
+
+ /// \}
+
+public:
+ /// \name Index Construction
+ /// \{
+
+ /// \brief Adds a record's mapping data.
+ ///
+ /// \param[in] b %BAM record
+ /// \param[in] offset \b virtual file offset where record begins
+ ///
+ void AddRecord(const BamRecord& b, int64_t offset);
+
+ /// \}
+
+public:
+ /// \name Raw Data Containers
+ /// \{
+
+ std::vector<int32_t> rgId_;
+ std::vector<int32_t> qStart_;
+ std::vector<int32_t> qEnd_;
+ std::vector<int32_t> holeNumber_;
+ std::vector<float> readQual_;
+ std::vector<uint8_t> ctxtFlag_;
+ std::vector<int64_t> fileOffset_;
+ std::vector<uint16_t> fileNumber_;
+
+ /// \}
+};
+
+/// \deprecated For legacy-code support only, and will be removed soon.
+/// Use PbiRawBasicData instead.
+///
+typedef PbiRawBasicData PbiRawSubreadData;
+
+/// \brief The PbiRawData class provides an representation of raw PBI index
+/// data, used mostly for construction or I/O.
+///
+/// The PbiRawData class itself provides access to a few high-level attributes
+/// (e.g. version, number of records, etc.). The actual index data is stored
+/// in its member components:
+/// PbiRawBasicData,
+/// PbiRawMappedData,
+/// PbiRawReferenceData, &
+/// PbiRawBarcodeData .
+///
+class PBBAM_EXPORT PbiRawData
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty raw data structure, ready for building.
+ ///
+ PbiRawData(void);
+
+ /// \brief Loads raw PBI data from a file.
+ ///
+ /// \param[in] pbiFilename ".pbi" filename
+ ///
+ /// \throws std::runtime_error if file contents cannot be loaded properly
+ ///
+ PbiRawData(const std::string& pbiFilename);
+
+ /// \brief Loads a raw, aggregate PBI data from a dataset
+ ///
+ /// This constructor creates a raw index object that contains an aggregation
+ /// of index data across the dataset.
+ ///
+ /// \note ReferenceData (the per-reference table for coordinate-sorted data)
+ /// is not currently available for the index aggregate. All other
+ /// per-record data sections will be present.
+ ///
+ /// \param[in] dataset DataSet object
+ ///
+ /// \throws std::runtime_error if file(s) contents cannot be loaded properly
+ ///
+ explicit PbiRawData(const DataSet& dataset);
+
+ PbiRawData(const PbiRawData& other);
+ PbiRawData(PbiRawData&& other);
+ PbiRawData& operator=(const PbiRawData& other);
+ PbiRawData& operator=(PbiRawData&& other);
+ ~PbiRawData(void);
+
+ /// \}
+
+public:
+ /// \name PBI General Attributes
+ /// \{
+
+ /// \returns true if index has BarcodeData section
+ bool HasBarcodeData(void) const;
+
+ /// \returns true if index has MappedData section
+ bool HasMappedData(void) const;
+
+ /// \returns true if index has ReferenceData section
+ bool HasReferenceData(void) const;
+
+ /// \returns true if index has \b section
+ /// \param[in] section PbiFile::Section identifier
+ ///
+ bool HasSection(const PbiFile::Section section) const;
+
+ /// \returns index filename ("*.pbi")
+ ///
+ /// \note Returns an empty string if the underlying data was calculated in
+ /// code or aggregated from a DataSet, rather than loaded from a
+ /// single PBI file.
+ ///
+ std::string Filename(void) const;
+
+ /// \returns enum flags representing the file sections present
+ PbiFile::Sections FileSections(void) const;
+
+ /// \returns the number of records in the PBI(s)
+ uint32_t NumReads(void) const;
+
+ /// \returns the PBI file's version
+ PbiFile::VersionEnum Version(void) const;
+
+ /// \}
+
+public:
+ /// \name Raw Data Components
+ /// \{
+
+ /// \returns const reference to BarcodeData lookup structure
+ ///
+ /// May be empty, check result of HasBarcodeData.
+ ///
+ const PbiRawBarcodeData& BarcodeData(void) const;
+
+ /// \returns const reference to BasicData lookup structure
+ const PbiRawBasicData& BasicData(void) const;
+
+ /// \returns const reference to MappedData lookup structure
+ ///
+ /// May be empty, check result of HasMappedData.
+ ///
+ const PbiRawMappedData& MappedData(void) const;
+
+ /// \returns const reference to reference data lookup structure
+ ///
+ /// May be empty, check result of HasReferenceData.
+ ///
+ const PbiRawReferenceData& ReferenceData(void) const;
+
+ /// \}
+
+public:
+ /// \name PBI General Attributes
+ /// \{
+
+ /// \brief Sets the file section flags.
+ ///
+ /// \param[in] sections section flags
+ /// \returns reference to this index
+ ///
+ PbiRawData& FileSections(PbiFile::Sections sections);
+
+ /// \brief Sets the number of indexed records.
+ ///
+ /// \param[in] num number of records
+ /// \returns reference to this index
+ ///
+ PbiRawData& NumReads(uint32_t num);
+
+ /// \brief Sets PBI file version.
+ ///
+ /// \param[in] version file version
+ /// \returns reference to this index
+ ///
+ PbiRawData& Version(PbiFile::VersionEnum version);
+
+ /// \}
+
+public:
+ /// \name Raw Data Components
+ /// \{
+
+ /// \returns reference to BarcodeData lookup structure
+ ///
+ /// May be empty, check result of HasBarcodeData.
+ ///
+ PbiRawBarcodeData& BarcodeData(void);
+
+ /// \returns reference to BasicData lookup structure
+ PbiRawBasicData& BasicData(void);
+
+ /// \returns reference to MappedData lookup structure
+ ///
+ /// May be empty, check result of HasMappedData.
+ ///
+ PbiRawMappedData& MappedData(void);
+
+ /// \returns reference to reference data lookup structure
+ ///
+ /// May be empty, check result of HasReferenceData.
+ ///
+ PbiRawReferenceData& ReferenceData(void);
+
+ /// \}
+
+private:
+ std::string filename_;
+ PbiFile::VersionEnum version_;
+ PbiFile::Sections sections_;
+ uint32_t numReads_;
+ PbiRawBarcodeData barcodeData_;
+ PbiRawMappedData mappedData_;
+ PbiRawReferenceData referenceData_;
+ PbiRawBasicData basicData_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/PbiRawData.inl"
+
+#endif // PBIRAWDATA_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Position.h
+/// \brief Defines the Position typedef.
+//
+// Author: Derek Barnett
+
+#ifndef POSITION_H
+#define POSITION_H
+
+#include "pbbam/Config.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief This type is used to refer to genomic positions.
+/// \typedef typedef int32_t PacBio::BAM::Position
+///
+/// We use a signed integer because SAM/BAM uses the -1 value to indicate
+/// unknown or unmapped positions.
+///
+typedef int32_t Position;
+
+/// \brief This constant is widely used as a "missing" or "invalid" position
+/// marker.
+///
+static const Position UnmappedPosition = Position(-1);
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // POSITION_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ProgramInfo.h
+/// \brief Defines the ProgramInfo class.
+//
+// Author: Derek Barnett
+
+#ifndef PROGRAMINFO_H
+#define PROGRAMINFO_H
+
+#include "pbbam/Config.h"
+#include <map>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The ProgramInfo class represents a program entry (\@PG) in the SAM
+/// header.
+///
+class PBBAM_EXPORT ProgramInfo
+{
+public:
+ /// \name Conversion & Validation
+ ///
+
+ /// \brief Creates a ProgramInfo object from SAM-formatted text.
+ ///
+ /// \param[in] sam SAM-formatted text
+ /// \returns program info object
+ ///
+ static ProgramInfo FromSam(const std::string& sam);
+
+ /// \brief Converts a ProgramInfo object to its SAM-formatted text.
+ ///
+ /// \param[in] prog input ProgramInfo object
+ /// \returns SAM-formatted text (no trailing newline)
+ ///
+ static std::string ToSam(const ProgramInfo& prog);
+
+ /// \}
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty program info object.
+ ProgramInfo(void);
+
+ /// \brief Creates a program info object with an ID.
+ ///
+ /// \param[in] id program ID (\@PG:ID)
+ ///
+ ProgramInfo(const std::string& id);
+
+
+ ProgramInfo(const ProgramInfo& other);
+ ProgramInfo(ProgramInfo&& other);
+ ProgramInfo& operator=(const ProgramInfo& other);
+ ProgramInfo& operator=(ProgramInfo&& other);
+ ~ProgramInfo(void);
+
+ /// \}
+
+public:
+ /// \name Conversion & Validation
+ ///
+
+ /// \returns true if program info is valid
+ ///
+ /// Currently this checks to see that ProgramInfo::Id does not contain an
+ /// empty string.
+ ///
+ bool IsValid(void) const;
+
+ /// \brief Converts this object to its SAM-formatted text.
+ ///
+ /// \returns SAM-formatted text (no trailing newline)
+ ///
+ std::string ToSam(void) const;
+
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ /// \returns string value of \@PG:CL
+ std::string CommandLine(void) const;
+
+ /// \returns any non-standard tags added to the \@PG entry
+ ///
+ /// Result map consists of {tagName => value}.
+ ///
+ std::map<std::string, std::string> CustomTags(void) const;
+
+ /// \returns string value of \@PG:DS
+ std::string Description(void) const;
+
+ /// \returns string value of \@PG:ID
+ std::string Id(void) const;
+
+ /// \returns string value of \@PG:PN
+ std::string Name(void) const;
+
+ /// \returns string value of \@PG:PP
+ std::string PreviousProgramId(void) const;
+
+ /// \returns string value of \@PG:VN
+ std::string Version(void) const;
+
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ /// \brief Sets the value for \@PG:CL
+ ///
+ /// \param[in] cmd new value
+ /// \returns reference to this object
+ ///
+ ProgramInfo& CommandLine(const std::string& cmd);
+
+ /// \brief Sets a new collection of non-standard tags.
+ ///
+ /// Custom tag map entries should consist of {tagName => value}.
+ ///
+ /// \param[in] custom new tags
+ /// \returns reference to this object
+ ///
+ ProgramInfo& CustomTags(const std::map<std::string, std::string>& custom);
+
+ /// \brief Sets the value for \@PG:DS
+ ///
+ /// \param[in] description new value
+ /// \returns reference to this object
+ ///
+ ProgramInfo& Description(const std::string& description);
+
+ /// \brief Sets the value for \@PG:ID
+ ///
+ /// \param[in] id new value
+ /// \returns reference to this object
+ ///
+ ProgramInfo& Id(const std::string& id);
+
+ /// \brief Sets the value for \@PG:PN
+ ///
+ /// \param[in] name new value
+ /// \returns reference to this object
+ ///
+ ProgramInfo& Name(const std::string& name);
+
+ /// \brief Sets the value for \@PG:PP
+ ///
+ /// \param[in] id new value
+ /// \returns reference to this object
+ ///
+ ProgramInfo& PreviousProgramId(const std::string& id);
+
+ /// \brief Sets the value for \@PG:VN
+ ///
+ /// \param[in] version new value
+ /// \returns reference to this object
+ ///
+ ProgramInfo& Version(const std::string& version);
+
+ /// \}
+
+private:
+ std::string commandLine_; // CL:<CommandLine>
+ std::string description_; // DS:<Description>
+ std::string id_; // ID:<ID> * must be unique for valid SAM *
+ std::string name_; // PN:<Name>
+ std::string previousProgramId_; // PP:<PreviousProgramID>
+ std::string version_; // VN:<Version>
+
+ // custom attributes
+ std::map<std::string, std::string> custom_; // tag => value
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/ProgramInfo.inl"
+
+#endif // PROGRAMINFO_H
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PulseBehavior.h
+/// \brief Defines the PulseBehavior enum.
+//
+// Author: Derek Barnett
+
+#ifndef PULSEBEHAVIOR_H
+#define PULSEBEHAVIOR_H
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief This enum defines the pulsecall modes supported by BamRecord tag
+/// accessors.
+///
+enum class PulseBehavior
+{
+ BASECALLS_ONLY ///< "Squashed" pulses not included, only basecalls.
+ , ALL ///< All pulses included.
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // PULSEBEHAVIOR_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file QNameQuery.h
+/// \brief Defines the QNameQuery class.
+//
+// Author: Derek Barnett
+
+#ifndef QNAMEQUERY_H
+#define QNAMEQUERY_H
+
+#include "pbbam/internal/QueryBase.h"
+#include <memory>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The QNameQuery class provides iterable access to a DataSet's records,
+/// with each iteration of the query returning a contiguous block of
+/// records that share a name.
+///
+/// There is no random-access here. It is simply a sequential read-through,
+/// grouping contiguous results that share a BamRecord::FullName.
+///
+/// \note The name is not ideal - but for legacy reasons, it will remain as-is
+/// for now. It will likely become something more explicit, like
+/// "SequentialQNameGroupQuery", so that the name "QNameQuery" will be
+/// available for a built-in query on a QNAME filter (or whitelist). This
+/// will make it more consistent with other queries (ReadAccuracyQuery,
+/// SubreadLengthQuery, ZmwQuery, etc).
+///
+class PBBAM_EXPORT QNameQuery : public internal::IGroupQuery
+{
+public:
+
+ /// \brief Creates a new QNameQuery.
+ ///
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM files
+ ///
+ QNameQuery(const DataSet& dataset);
+ ~QNameQuery(void);
+
+public:
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(std::vector<BamRecord>& records);
+
+private:
+ struct QNameQueryPrivate;
+ std::unique_ptr<QNameQueryPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // QNAMEQUERY_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file QualityValue.h
+/// \brief Defines the QualityValue class.
+//
+// Author: Derek Barnett
+
+#ifndef QUALITYVALUE_H
+#define QUALITYVALUE_H
+
+#include "pbbam/Config.h"
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The QualityValue class represents a FASTQ-compatible quality value.
+///
+/// Integers are clamped to [0, 93] (corresponding to ASCII printable chars
+/// [!-~]).
+///
+/// Use QualityValue::FromFastq for constructing entries from FASTQ encoding
+/// characters. Otherwise, the resulting QualityValue will be interpreted using
+/// the character's numeric value (ignoring the FASTQ offset of 33).
+///
+class PBBAM_EXPORT QualityValue
+{
+public:
+ static const uint8_t MAX;
+
+public:
+ /// \name Conversion Methods
+ /// \{
+
+ /// \brief Creates a QualityValue from a FASTQ-encoding character.
+ ///
+ /// \param[in] c FASTQ character
+ /// \returns quality value representing (c - 33)
+ ///
+ static QualityValue FromFastq(const char c);
+
+ /// \}
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates a QualityValue with specified value.
+ ///
+ /// \param[in] value quality value
+ ///
+ QualityValue(const uint8_t value = 0);
+
+ QualityValue(const QualityValue& other);
+ ~QualityValue(void);
+
+ /// \}
+
+public:
+ /// \name Conversion Methods
+ /// \{
+
+ /// \returns the FASTQ-encoding char for this QualityValue
+ char Fastq(void) const;
+
+ /// \returns the integer value of this QualityValue
+ operator uint8_t(void) const;
+
+ /// \}
+
+private:
+ uint8_t value_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/QualityValue.inl"
+
+#endif // QUALITYVALUE_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file QualityValues.h
+/// \brief Defines the QualityValues class.
+//
+// Author: Derek Barnett
+
+#ifndef QUALITYVALUES_H
+#define QUALITYVALUES_H
+
+#include "pbbam/QualityValue.h"
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The QualityValues class represents a sequence of FASTQ-compatible
+/// quality values. See QualityValue documentation for more details.
+///
+class PBBAM_EXPORT QualityValues : public std::vector<QualityValue>
+{
+public:
+ /// \brief Creates a QualityValues object from a FASTQ-encoded string.
+ ///
+ /// \param[in] fastq FASTQ-encoded string
+ /// \returns corresponding QualityValues object
+ ///
+ static QualityValues FromFastq(const std::string& fastq);
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Default constructor - creates an empty QualityValues object.
+ QualityValues(void);
+
+ /// \brief Creates a QualityValues object from a FASTQ-encoded string.
+ ///
+ /// \param[in] fastqString FASTQ-encoded string
+ ///
+ explicit QualityValues(const std::string& fastqString);
+
+ /// \brief Creates a QualityValues object from a vector of QualityValue
+ /// elements.
+ ///
+ /// \param[in] quals vector of QualityValue elements
+ ///
+ explicit QualityValues(const std::vector<QualityValue>& quals);
+
+ /// \brief Creates a QualityValues object from a vector of QualityValue
+ /// elements.
+ ///
+ /// \param[in] quals vector of QualityValue elements
+ ///
+ QualityValues(std::vector<QualityValue>&& quals);
+
+ /// \brief Creates a QualityValues object from a vector of (numeric) quality
+ /// values.
+ ///
+ /// \param[in] quals vector of quality value numbers
+ ///
+ explicit QualityValues(const std::vector<uint8_t>& quals);
+
+ /// \brief Creates a QualityValues object from the contents of the range:
+ /// [first, last)
+ ///
+ /// \param[in] first input iterator, whose element is a numeric quality
+ /// \param[in] last input iterator, whose element is a numeric quality
+ ///
+ QualityValues(const std::vector<uint8_t>::const_iterator first,
+ const std::vector<uint8_t>::const_iterator last);
+
+ /// \brief Creates a QualityValues object from the contents of the range:
+ /// [first, last)
+ ///
+ /// \param[in] first input iterator, whose element is a QualityValue
+ /// \param[in] last input iterator, whose element is a QualityValue
+ ///
+ QualityValues(const QualityValues::const_iterator first,
+ const QualityValues::const_iterator last);
+
+ /// \brief Copy constructor
+ QualityValues(const QualityValues& other);
+
+ /// \brief Move constructor
+ QualityValues(QualityValues&& other);
+
+ /// \brief Copy assignment operator
+ ///
+ /// \param[in] other QualityValues object
+ ///
+ QualityValues& operator=(const QualityValues& other);
+
+ /// \brief Move assignment operator
+ ///
+ /// \param[in] other QualityValues object
+ ///
+ QualityValues& operator=(QualityValues&& other);
+
+ /// \brief Copy assignment operator
+ ///
+ /// \param[in] quals vector of QualityValue elements
+ ///
+ QualityValues& operator=(const std::vector<QualityValue>& quals);
+
+ /// \brief Move assignment operator
+ ///
+ /// \param[in] quals vector of QualityValue elements
+ ///
+ QualityValues& operator=(std::vector<QualityValue>&& quals);
+
+ /// \brief Destructor
+ ~QualityValues(void);
+
+ /// \}
+
+public:
+ /// \name Comparison Operators
+ /// \{
+
+ bool operator==(const std::string& other) const;
+ bool operator!=(const std::string& other) const;
+
+ /// \}
+
+public:
+ /// \name Iterators
+ /// \{
+
+ /// \returns a const_iterator to the beginning of the sequence
+ std::vector<QualityValue>::const_iterator cbegin(void) const;
+
+ /// \returns a const_iterator to the element following the last element
+ std::vector<QualityValue>::const_iterator cend(void) const;
+
+ /// \returns a const_iterator to the beginning of the sequence
+ std::vector<QualityValue>::const_iterator begin(void) const;
+
+ /// \returns a const_iterator to the element following the last element
+ std::vector<QualityValue>::const_iterator end(void) const;
+
+ /// \returns an iterator to the beginning of the sequence
+ std::vector<QualityValue>::iterator begin(void);
+
+ /// \returns an iterator to the element following the last element
+ std::vector<QualityValue>::iterator end(void);
+
+ /// \}
+
+public:
+ /// \name Conversion Methods
+ /// \{
+
+ /// \returns the FASTQ-encoded string for this sequence of quality values
+ std::string Fastq(void) const;
+
+ /// \}
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/QualityValues.inl"
+
+#endif // QUALITYVALUES_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ReadAccuracyQuery.h
+/// \brief Defines the ReadAccuracyQuery class.
+//
+// Author: Derek Barnett
+
+#ifndef READACCURACYQUERY_H
+#define READACCURACYQUERY_H
+
+#include "pbbam/Accuracy.h"
+#include "pbbam/Compare.h"
+#include "pbbam/Config.h"
+#include "pbbam/internal/QueryBase.h"
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The ReadAccuracyQuery class provides iterable access to a DataSet's
+/// %BAM records, limiting results to those matching a read accuracy
+/// criterion.
+///
+/// Example:
+/// \include code/ReadAccuracyQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+/// Use BamFile::EnsurePacBioIndexExists before creating the query if one
+/// may not be present.
+///
+class PBBAM_EXPORT ReadAccuracyQuery : public internal::IQuery
+{
+public:
+
+ /// \brief Creates a new ReadAccuracyQuery, limiting record results to only
+ /// those matching a read accuracy criterion.
+ ///
+ /// \param[in] accuracy read accuracy value
+ /// \param[in] compareType compare operator
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \sa BamRecord::ReadAccuracy
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or PBI
+ /// files.
+ ///
+ ReadAccuracyQuery(const Accuracy accuracy,
+ const Compare::Type compareType,
+ const DataSet& dataset);
+
+ ~ReadAccuracyQuery(void);
+
+public:
+
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
+
+private:
+ struct ReadAccuracyQueryPrivate;
+ std::unique_ptr<ReadAccuracyQueryPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // READACCURACYQUERY_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ReadGroupInfo.h
+/// \brief Defines the ReadGroupInfo class.
+//
+// Author: Derek Barnett
+
+#ifndef READGROUPINFO_H
+#define READGROUPINFO_H
+
+#include "pbbam/Config.h"
+#include "pbbam/exception/InvalidSequencingChemistryException.h"
+#include <map>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief This enum describes the base features that may be present in a read
+/// group's records.
+///
+/// This information is stored in its description (\@RG:DS).
+///
+enum class BaseFeature
+{
+ DELETION_QV
+ , DELETION_TAG
+ , INSERTION_QV
+ , MERGE_QV
+ , SUBSTITUTION_QV
+ , SUBSTITUTION_TAG
+ , IPD
+ , PULSE_WIDTH
+ , PKMID
+ , PKMEAN
+ , PKMID2
+ , PKMEAN2
+ , LABEL
+ , LABEL_QV
+ , ALT_LABEL
+ , ALT_LABEL_QV
+ , PULSE_MERGE_QV
+ , PULSE_CALL
+ , PRE_PULSE_FRAMES
+ , PULSE_CALL_WIDTH
+ , START_FRAME
+};
+
+/// \brief This enum describes the encoding types used for frame data within a
+/// read group's records.
+///
+/// This information is stored in its description (\@RG:DS).
+///
+enum class FrameCodec
+{
+ RAW
+ , V1
+};
+
+/// \brief This enum describes the experimental design of the barcodes within a
+/// read group's records.
+///
+/// This information is stored in its description (\@RG:DS).
+///
+enum class BarcodeModeType
+{
+ NONE
+ , SYMMETRIC
+ , ASYMMETRIC
+};
+
+/// \brief This enum describes the type of value encoded by barcode quality,
+/// within a read group's records.
+///
+/// This information is stored in its description (\@RG:DS).
+///
+enum class BarcodeQualityType
+{
+ NONE
+ , SCORE
+ , PROBABILITY
+};
+
+/// \brief This enum describes the instrument type / platform model,
+/// within a read group's records.
+///
+/// This information is stored in its description (\@RG:PM).
+///
+enum class PlatformModelType
+{
+ ASTRO
+ , RS
+ , SEQUEL
+};
+
+/// \brief The ReadGroupInfo class represents a read group entry (\@RG) in the
+/// SAM header.
+///
+class PBBAM_EXPORT ReadGroupInfo
+{
+public:
+ /// \name Conversion & Validation
+ ///
+
+ /// \brief Creates a ReadGroupInfo object from SAM-formatted text.
+ ///
+ /// \param[in] sam SAM-formatted text
+ /// \returns read group info object
+ ///
+ static ReadGroupInfo FromSam(const std::string& sam);
+
+ /// \brief Converts a ReadGroupInfo object to its SAM-formatted text.
+ ///
+ /// \param[in] rg input ReadGroupInfo object
+ /// \returns SAM-formatted text (no trailing newline)
+ ///
+ static std::string ToSam(const ReadGroupInfo& rg);
+
+ /// \brief Converts a read group ID (string) to its numeric value.
+ ///
+ /// \param[in] rgId read group ID string
+ /// \returns numeric value of ID
+ ///
+ static int32_t IdToInt(const std::string& rgId);
+
+ /// \brief Converts a read group ID number to its string representation.
+ ///
+ /// \param[in] id read group ID number
+ /// \returns hexadecimal string representation of ID
+ ///
+ static std::string IntToId(const int32_t id);
+
+ /// \returns sequencing chemistry from (bindingKig, sequencingKit,
+ /// basecallerVersion)
+ ///
+ static std::string SequencingChemistryFromTriple(const std::string& bindingKit,
+ const std::string& sequencingKit,
+ const std::string& basecallerVersion);
+
+ /// \}
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty read group info object.
+ ReadGroupInfo(void);
+
+ /// \brief Creates a read group info object with an ID.
+ ///
+ /// \param[in] id string representation of read group ID
+ ///
+ ReadGroupInfo(const std::string& id);
+
+ /// \brief Creates a read group info object from a movie name & read type.
+ ///
+ /// \param[in] movieName sequencing movie name
+ /// \param[in] readType string version of record type
+ ///
+ /// \sa RecordType
+ ///
+ ReadGroupInfo(const std::string& movieName,
+ const std::string& readType);
+
+ /// \brief Creates a read group info object from a movie name, read type,
+ /// and platform model.
+ ///
+ /// \param[in] movieName sequencing movie name
+ /// \param[in] readType string version of record type
+ /// \param[in] platform platform model type
+ ///
+ /// \sa RecordType
+ ///
+ ReadGroupInfo(const std::string& movieName,
+ const std::string& readType,
+ const PlatformModelType platform);
+
+ ReadGroupInfo(const ReadGroupInfo& other);
+ ReadGroupInfo(ReadGroupInfo&& other);
+ ReadGroupInfo& operator=(const ReadGroupInfo& other);
+ ReadGroupInfo& operator=(ReadGroupInfo&& other);
+ ~ReadGroupInfo(void);
+
+ /// \}
+
+public:
+ /// \name Comparison Operators
+ /// \{
+
+ bool operator==(const ReadGroupInfo& other) const;
+
+ /// \}
+
+public:
+ /// \name Conversion & Validation
+ /// \{
+
+ /// \returns true if read group info is valid
+ ///
+ /// Currently this checks to see that ReadGroupInfo::Id does not contain an
+ /// empty string.
+ ///
+ bool IsValid(void) const;
+
+ /// \brief Converts this object to its SAM-formatted text.
+ ///
+ /// \returns SAM-formatted text (no trailing newline)
+ ///
+ std::string ToSam(void) const;
+
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ /// \returns the number of barcode sequences in BarcodeFile
+ ///
+ /// \throws std::runtime_error if barcode data not set.
+ /// Check HasBarcodeData if this data may be absent.
+ ///
+ size_t BarcodeCount(void) const;
+
+ /// \returns name of FASTA file containing barcode sequences
+ ///
+ /// \throws std::runtime_error if barcode data not set.
+ /// Check HasBarcodeData if this data may be absent.
+ ///
+ std::string BarcodeFile(void) const;
+
+ /// \returns MD5 hash of the contents of BarcodeFile
+ ///
+ /// \throws std::runtime_error if barcode data not set.
+ /// Check HasBarcodeData if this data may be absent.
+ ///
+ std::string BarcodeHash(void) const;
+
+ /// \returns experimental design type of barcodes
+ ///
+ /// \throws std::runtime_error if barcode data not set.
+ /// Check HasBarcodeData if this data may be absent.
+ ///
+ BarcodeModeType BarcodeMode(void) const;
+
+ /// \returns type of value encoded in the 'bq' tag
+ ///
+ /// \throws std::runtime_error if barcode data is not set.
+ /// Check HasBarcodeData if this data may be absent.
+ ///
+ BarcodeQualityType BarcodeQuality(void) const;
+
+ /// \returns basecaller version number (e.g. "2.1")
+ std::string BasecallerVersion(void) const;
+
+ /// \returns tag name in use for the specified for base feature
+ std::string BaseFeatureTag(const BaseFeature& feature) const;
+
+ /// \returns binding kit part number (e.g. "100236500")
+ std::string BindingKit(void) const;
+
+ /// \returns true if reads are classified as spike-in controls
+ bool Control(void) const;
+
+ /// \returns any non-standard tags added to the \@PG entry
+ ///
+ /// Result map consists of {tagName => value}.
+ ///
+ std::map<std::string, std::string> CustomTags(void) const;
+
+ /// \returns string value of \@RG:DT
+ std::string Date(void) const;
+
+ /// \returns string value of \@RG:FO
+ std::string FlowOrder(void) const;
+
+ /// \returns frame rate in Hz
+ std::string FrameRateHz(void) const;
+
+ /// \returns true if read group has barcode data
+ bool HasBarcodeData(void) const;
+
+ /// \returns true if read group has an entry for the specified base feature
+ bool HasBaseFeature(const BaseFeature& feature) const;
+
+ /// \returns string value of \@RG:ID
+ std::string Id(void) const;
+
+ /// \returns codec type in use for IPD
+ FrameCodec IpdCodec(void) const;
+
+ /// \returns string value of \@RG:KS
+ std::string KeySequence(void) const;
+
+ /// \returns string value of \@RG:LB
+ std::string Library(void) const;
+
+ /// \returns movie name (stored in \@RG:PU)
+ std::string MovieName(void) const;
+
+ /// \returns string value of \@RG:PL
+ std::string Platform(void) const;
+
+ /// \returns string value of \@RG:PM
+ PlatformModelType PlatformModel(void) const;
+
+ /// \returns string value of \@RG:PI
+ std::string PredictedInsertSize(void) const;
+
+ /// \returns string value of \@RG:PG
+ std::string Programs(void) const;
+
+ /// \returns codec type in use for PulseWidth
+ FrameCodec PulseWidthCodec(void) const;
+
+ /// \returns string value of read type
+ std::string ReadType(void) const;
+
+ /// \returns string value of \@RG:SM
+ std::string Sample(void) const;
+
+ /// \returns string value of \@RG:CN
+ std::string SequencingCenter(void) const;
+
+ /// \returns sequencing chemistry name
+ std::string SequencingChemistry(void) const;
+
+ /// \returns sequencing kit part number
+ std::string SequencingKit(void) const;
+
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ /// \brief Sets read group's barcode data.
+ ///
+ /// Barcode fields are either absent or all must be present.
+ ///
+ /// \param[in] barcodeFile barcode filename
+ /// \param[in] barcodeHash MD5 hash of barcode file
+ /// \param[in] barcodeCount number of records in barcode file
+ /// \param[in] barcodeMode experimental design of barcodes
+ /// \param[in] barcodeQuality type of barcode quality value
+ ///
+ /// \sa BarcodeFile \n
+ /// BarcodeHash \n
+ /// BarcodeCount \n
+ /// BarcodeMode \n
+ /// BarcodeQuality \n
+ /// ReadGroupInfo::ClearBarcodeData
+ ///
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& BarcodeData(const std::string& barcodeFile,
+ const std::string& barcodeHash,
+ size_t barcodeCount,
+ BarcodeModeType barcodeMode,
+ BarcodeQualityType barcodeQuality);
+
+ /// \brief Sets the basecaller version number.
+ ///
+ /// \param[in] versionNumber new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& BasecallerVersion(const std::string& versionNumber);
+
+ /// \brief Sets the tag to be used for a particular base feature.
+ ///
+ /// \param[in] feature feature type begin updated
+ /// \param[in] tag new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& BaseFeatureTag(const BaseFeature& feature,
+ const std::string& tag);
+
+ /// \brief Sets the binding kit part number.
+ ///
+ /// \param[in] kitNumber new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& BindingKit(const std::string& kitNumber);
+
+ /// \brief Removes all barcode data from this read group.
+ ///
+ /// \returns reference to this read group
+ ///
+ ReadGroupInfo& ClearBarcodeData(void);
+
+ /// \brief Removes all base features from this read group.
+ ///
+ /// \returns reference to this read group
+ ///
+ ReadGroupInfo& ClearBaseFeatures(void);
+
+ /// \brief Sets whether read group's records are classifed as spike-in
+ /// controls.
+ ///
+ /// \param[in] ctrl true if records are spike-in controls
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& Control(const bool ctrl);
+
+ /// \brief Sets a new collection of non-standard tags.
+ ///
+ /// Custom tag map entries should consist of {tagName => value}.
+ ///
+ /// \param[in] custom new tags
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& CustomTags(const std::map<std::string, std::string>& custom);
+
+ /// \brief Sets the value for \@RG:DT
+ ///
+ /// \param[in] date new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& Date(const std::string& date);
+
+ /// \brief Sets the value for \@RG:FO
+ ///
+ /// \param[in] order new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& FlowOrder(const std::string& order);
+
+ /// \brief Sets the frame rate.
+ ///
+ /// \param[in] frameRateHz string value of frame rate in Hz
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& FrameRateHz(const std::string& frameRateHz);
+
+ /// \brief Sets the read group's ID.
+ ///
+ /// \param[in] id string value of ID
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& Id(const std::string& id);
+
+ /// \brief Sets the read group's ID, from movie name & read type
+ ///
+ /// \param[in] movieName sequencing movie name
+ /// \param[in] readType string version of read type
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& Id(const std::string& movieName,
+ const std::string& readType);
+
+ /// \brief Sets the codec type used for IPD
+ ///
+ /// \param[in] codec codec type
+ /// \param[in] tag IPD tag
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& IpdCodec(const FrameCodec& codec,
+ const std::string& tag = std::string());
+
+ /// \brief Sets the value for \@RG:KS
+ ///
+ /// \param[in] sequence new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& KeySequence(const std::string& sequence);
+
+ /// \brief Sets the value for \@RG:LB
+ ///
+ /// \param[in] library new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& Library(const std::string& library);
+
+ /// \brief Sets the value for movie name (stored in \@RG:PU).
+ ///
+ /// \param[in] movieName new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& MovieName(const std::string& movieName);
+
+ /// \brief Sets the value for \@RG:PI
+ ///
+ /// \param[in] size new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& PredictedInsertSize(const std::string& size);
+
+ /// \brief Sets the value for \@RG:PG
+ ///
+ /// \param[in] programs new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& Programs(const std::string& programs);
+
+ /// \brief Sets the value for \@RG:PM
+ ///
+ /// \param[in] platformModel new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& PlatformModel(const PlatformModelType& platform);
+
+ /// \brief Sets the codec type used for PulseWidth
+ ///
+ /// \param[in] codec codec type
+ /// \param[in] tag pulse width tag
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& PulseWidthCodec(const FrameCodec& codec,
+ const std::string& tag = std::string());
+
+ /// \brief Sets the read type.
+ ///
+ /// \param[in] type new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& ReadType(const std::string& type);
+
+ /// \brief Removes a particular base feature from this read group.
+ ///
+ /// \param[in] feature feature to remove
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& RemoveBaseFeature(const BaseFeature& feature);
+
+ /// \brief Sets the value for \@RG:SM
+ ///
+ /// \param[in] sample new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& Sample(const std::string& sample);
+
+ /// \brief Sets the value for \@RG:CN
+ ///
+ /// \param[in] center new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& SequencingCenter(const std::string& center);
+
+ /// \brief Sets the sequencing kit part number.
+ ///
+ /// \param[in] kitNumber new value
+ /// \returns reference to this object
+ ///
+ ReadGroupInfo& SequencingKit(const std::string& kitNumber);
+
+ /// \}
+
+private:
+ std::string id_; // ID * must be unique for valid SAM *
+ std::string sequencingCenter_; // CN
+ std::string date_; // DT * (ISO-8601) *
+ std::string flowOrder_; // FO
+ std::string keySequence_; // KS
+ std::string library_; // LB
+ std::string programs_; // PG
+ std::string predictedInsertSize_; // PI
+ std::string movieName_; // PU
+ std::string sample_; // SM
+
+ PlatformModelType platformModel_; // PM
+
+ // DS:<Description> components
+ std::string readType_;
+ std::string bindingKit_;
+ std::string sequencingKit_;
+ std::string basecallerVersion_;
+ std::string frameRateHz_;
+ bool control_ = false;
+ FrameCodec ipdCodec_;
+ FrameCodec pulseWidthCodec_;
+ bool hasBarcodeData_ = false;
+ std::string barcodeFile_;
+ std::string barcodeHash_;
+ size_t barcodeCount_ = 0;
+ BarcodeModeType barcodeMode_ = BarcodeModeType::NONE;
+ BarcodeQualityType barcodeQuality_ = BarcodeQualityType::NONE;
+ std::map<BaseFeature, std::string> features_;
+
+ // custom attributes
+ std::map<std::string, std::string> custom_; // tag => value
+
+private:
+ std::string EncodeSamDescription(void) const;
+ void DecodeSamDescription(const std::string& description);
+};
+
+/// \brief Creates a read group ID from a movie name & read type.
+///
+/// \param[in] movieName sequencing movie name
+/// \param[in] readType string version of read type
+///
+/// \returns hexadecimal string read group ID
+///
+PBBAM_EXPORT
+std::string MakeReadGroupId(const std::string& movieName,
+ const std::string& readType);
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/ReadGroupInfo.inl"
+
+#endif // READGROUPINFO_H
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file RecordType.h
+/// \brief Defines the RecordType enum.
+//
+// Author: Derek Barnett
+
+#ifndef RECORDTYPE_H
+#define RECORDTYPE_H
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief This enum defines the possible PacBio BAM record types.
+///
+/// \sa ReadGroupInfo::ReadType
+///
+enum class RecordType
+{
+ ZMW ///< Polymerase read
+ , HQREGION ///< High-quality region
+ , SUBREAD ///< Subread (
+ , CCS ///< Circular consensus sequence
+ , SCRAP ///< Additional sequence (barcodes, adapters, etc.)
+ , UNKNOWN ///< Unknown read type
+
+ , POLYMERASE = ZMW ///< \deprecated as of PacBio BAM spec v 3.0.4 (use RecordType::ZMW instead)
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // RECORDTYPE_H
--- /dev/null
+// Copyright (c) 2014, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file SamTagCodec.h
+/// \brief Defines the SamTagCodec class.
+//
+// Author: Derek Barnett
+
+#ifndef SAMTAGCODEC_H
+#define SAMTAGCODEC_H
+
+#include "pbbam/Config.h"
+#include "pbbam/TagCollection.h"
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The SamTagCodec class provides text-based encoding/decoding of %BAM
+/// tag data.
+///
+/// \note SamTagCodec is mostly an implementation and/or testing detail, and may
+/// be removed from the public API.
+///
+class PBBAM_EXPORT SamTagCodec
+{
+public:
+ /// \name Tag Collection Methods
+ /// \{
+
+ /// \brief Creates a TagCollection from SAM-formatted tag data.
+ ///
+ /// \param[in] tagString SAM-formmated string
+ /// \returns resulting tag collection
+ ///
+ static TagCollection Decode(const std::string& tagString);
+
+ /// \brief Creates SAM-formatted string from a TagCollection.
+ ///
+ /// \param[in] tags TagCollection containing tag data
+ /// \returns SAM-formatted string
+ ///
+ static std::string Encode(const PacBio::BAM::TagCollection& tags);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // SAMTAGCODEC_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file SamWriter.h
+/// \brief Defines the SamWriter class.
+//
+// Author: Derek Barnett
+
+#ifndef SAMWRITER_H
+#define SAMWRITER_H
+
+#include "pbbam/BamHeader.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/IRecordWriter.h"
+#include <memory>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal { class SamWriterPrivate; }
+
+/// \brief The SamWriter class provides a writing interface for creating
+/// new SAM files.
+///
+/// \note The underlying buffered data may not be flushed to the file until the
+/// destructor is called. Trying to access the file (reading, stat-ing,
+/// indexing, etc.) before the SamWriter is destroyed yields undefined
+/// behavior. Enclose the SamWriter in some form of local scope (curly
+/// braces, a separate function, etc.) to ensure that its destructor is
+/// called before proceeding to read-based operations.
+///
+/// \code{.cpp}
+/// {
+/// SamWriter w(...);
+/// // write data
+/// }
+/// // now safe to access the new file
+/// \endcode
+///
+///
+class SamWriter : public IRecordWriter
+{
+public:
+ /// \brief Opens a SAM file for writing & writes the header information.
+ ///
+ /// \note Set \p filename to "-" for stdout.
+ ///
+ /// \param[in] filename path to output SAM file
+ /// \param[in] header BamHeader object
+ ///
+ /// \throws std::runtime_error if there was a problem opening the file for
+ /// writing or if an error occurred while writing the header
+ ///
+ SamWriter(const std::string& filename, const BamHeader& header);
+
+ /// Fully flushes all buffered data & closes file.
+ ///
+ ~SamWriter(void);
+
+public:
+
+ /// \brief Try to flush any buffered data to file.
+ ///
+ /// \note The underlying implementation may not necessarily flush buffered
+ /// data immediately, especially in a multithreaded writer situation.
+ /// Let the SamWriter go out of scope to fully ensure flushing.
+ ///
+ /// \throws std::runtime_error if flush fails
+ ///
+ void TryFlush(void) override;
+
+ /// \brief Write a record to the output SAM file.
+ ///
+ /// \param[in] record BamRecord object
+ ///
+ /// \throws std::runtime_error on failure to write
+ ///
+ void Write(const BamRecord& record) override;
+
+ /// \brief Write a record to the output SAM file.
+ ///
+ /// \param[in] recordImpl BamRecordImpl object
+ ///
+ /// \throws std::runtime_error on failure to write
+ ///
+ void Write(const BamRecordImpl& recordImpl) override;
+
+private:
+ std::unique_ptr<internal::SamWriterPrivate> d_;
+ DISABLE_MOVE_AND_COPY(SamWriter);
+};
+
+} // namesapce BAM
+} // namespace PacBio
+
+#endif // SAMWRITER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file SequenceInfo.h
+/// \brief Defines the SequenceInfo class.
+//
+// Author: Derek Barnett
+
+#ifndef SEQUENCEINFO_H
+#define SEQUENCEINFO_H
+
+#include "pbbam/Config.h"
+#include <map>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The SequenceInfo class represents a program entry (\@SQ) in the SAM
+/// header.
+///
+class PBBAM_EXPORT SequenceInfo
+{
+public:
+ /// \name Conversion & Validation
+ ///
+
+ /// \brief Creates a SequenceInfo object from SAM-formatted text.
+ ///
+ /// \param[in] sam SAM-formatted text
+ /// \returns program info object
+ ///
+ static SequenceInfo FromSam(const std::string& sam);
+
+ /// \brief Converts a SequenceInfo object to its SAM-formatted text.
+ ///
+ /// \param[in] seq input SequenceInfo object
+ /// \returns SAM-formatted text (no trailing newline)
+ ///
+ static std::string ToSam(const SequenceInfo& seq);
+
+ /// \}
+
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty sequence info object.
+ SequenceInfo(void);
+
+ /// \brief Creates a sequence info object with name & (optional) length.
+ ///
+ /// \param[in] name sequence name (\@SQ:SN)
+ /// \param[in] length sequence length (\@SQ:LN)
+ ///
+ SequenceInfo(const std::string& name,
+ const std::string& length = "0");
+
+ SequenceInfo(const SequenceInfo& other);
+ SequenceInfo(SequenceInfo&& other);
+ SequenceInfo& operator=(const SequenceInfo& other);
+ SequenceInfo& operator=(SequenceInfo&& other);
+ ~SequenceInfo(void);
+
+ /// \}
+
+public:
+ /// \name Operators
+ /// \{
+
+ bool operator==(const SequenceInfo& other) const;
+ bool operator!=(const SequenceInfo& other) const;
+
+ /// \}
+
+public:
+ /// \name Conversion & Validation
+ ///
+
+ /// \returns true if sequence info is valid
+ ///
+ /// Currently this checks to see that Name is non-empty and Length is within
+ /// the accepted range.
+ ///
+ bool IsValid(void) const;
+
+ /// \brief Converts this object to its SAM-formatted text.
+ ///
+ /// \returns SAM-formatted text (no trailing newline)
+ ///
+ std::string ToSam(void) const;
+
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ /// \returns string value of \@SQ:AS
+ std::string AssemblyId(void) const;
+
+ /// \returns string value of \@SQ:M5
+ std::string Checksum(void) const;
+
+ /// \returns any non-standard tags added to the \@PG entry
+ ///
+ /// Result map consists of {tagName => value}.
+ ///
+ std::map<std::string, std::string> CustomTags(void) const;
+
+ /// \returns string value of \@SQ:LN
+ std::string Length(void) const;
+
+ /// \returns string value of \@SQ:SN
+ std::string Name(void) const;
+
+ /// \returns string value of \@SQ:SP
+ std::string Species(void) const;
+
+ /// \returns string value of \@SQ:UR
+ std::string Uri(void) const;
+
+ /// \}
+
+public:
+ /// \name Attributes
+ /// \{
+
+ /// \brief Sets the value for \@SQ:AS
+ ///
+ /// \param[in] id new value
+ /// \returns reference to this object
+ ///
+ SequenceInfo& AssemblyId(const std::string& id);
+
+ /// \brief Sets the value for \@SQ:M5
+ ///
+ /// \param[in] checksum new value
+ /// \returns reference to this object
+ ///
+ SequenceInfo& Checksum(const std::string& checksum);
+
+ /// \brief Sets a new collection of non-standard tags.
+ ///
+ /// Custom tag map entries should consist of {tagName => value}.
+ ///
+ /// \param[in] custom new tags
+ /// \returns reference to this object
+ ///
+ SequenceInfo& CustomTags(const std::map<std::string, std::string>& custom);
+
+ /// \brief Sets the value for \@SQ:LN
+ ///
+ /// \param[in] length new value
+ /// \returns reference to this object
+ ///
+ SequenceInfo& Length(const std::string& length);
+
+ /// \brief Sets the value for \@SQ:SN
+ ///
+ /// \param[in] name new value
+ /// \returns reference to this object
+ ///
+ SequenceInfo& Name(const std::string& name);
+
+ /// \brief Sets the value for \@SQ:SP
+ ///
+ /// \param[in] species new value
+ /// \returns reference to this object
+ ///
+ SequenceInfo& Species(const std::string& species);
+
+ /// \brief Sets the value for \@SQ:UR
+ ///
+ /// \param[in] uri new value
+ /// \returns reference to this object
+ ///
+ SequenceInfo& Uri(const std::string& uri);
+
+ /// \}
+
+private:
+ std::string name_; // SN:<Name> * must be unique for valid SAM *
+ std::string length_; // LN:<Length> * must be within [0 - 2^31-1] *
+ std::string assemblyId_; // AS:<AssemblyId>
+ std::string checksum_; // M5:<Checksum>
+ std::string species_; // SP:<Species>
+ std::string uri_; // UR:<URI>
+
+ // custom attributes
+ std::map<std::string, std::string> custom_; // tag => value
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/SequenceInfo.inl"
+
+#endif // SEQUENCEINFO_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Strand.h
+/// \brief Defines the Strand enum.
+//
+// Author: Derek Barnett
+
+#ifndef STRAND_H
+#define STRAND_H
+
+#include "pbbam/Config.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief This enum defines the strand orientations used for reporting
+/// alignment-related information.
+///
+enum class Strand
+{
+ FORWARD ///< Forward strand
+ , REVERSE ///< Reverse strand
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // STRAND_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file SubreadLengthQuery.h
+/// \brief Defines the SubreadLengthQuery class.
+//
+// Author: Derek Barnett
+
+#ifndef SUBREADLENGTHQUERY_H
+#define SUBREADLENGTHQUERY_H
+
+#include "pbbam/Compare.h"
+#include "pbbam/Config.h"
+#include "pbbam/internal/QueryBase.h"
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The SubreadLengthQuery class provides iterable access to a DataSet's
+/// %BAM records, limiting results to those matching a subread length
+/// criterion.
+///
+/// Example:
+/// \include code/SubreadLengthQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+/// Use BamFile::EnsurePacBioIndexExists before creating the query if one
+/// may not be present.
+///
+class PBBAM_EXPORT SubreadLengthQuery : public internal::IQuery
+{
+public:
+ /// \brief Creates a new SubreadLengthQuery, limiting record results to only
+ /// those matching a subread length criterion.
+ ///
+ /// \param[in] length subread length value
+ /// \param[in] compareType compare operator
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or PBI
+ /// files.
+ ///
+ SubreadLengthQuery(const int32_t length,
+ const Compare::Type compareType,
+ const DataSet& dataset);
+
+ ~SubreadLengthQuery(void);
+
+public:
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
+
+private:
+ struct SubreadLengthQueryPrivate;
+ std::unique_ptr<SubreadLengthQueryPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // SUBREADLENGTHQUERY_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Tag.h
+/// \brief Defines the Tag class.
+//
+// Author: Derek Barnett
+
+#ifndef TAG_H
+#define TAG_H
+
+#include "pbbam/Config.h"
+#include <boost/variant.hpp>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief This enum is used to describe the exact (C++) data type held by a
+/// Tag.
+///
+enum class TagDataType
+{
+ INVALID = 0 ///< boost::blank
+ , INT8 ///< int8_t
+ , UINT8 ///< uint8_t
+ , INT16 ///< int16_t
+ , UINT16 ///< uint16_t
+ , INT32 = 5 ///< int32_t
+ , UINT32 ///< uint32_t
+ , FLOAT ///< float
+ , STRING ///< std::string
+ , INT8_ARRAY ///< std::vector<int8_t>
+ , UINT8_ARRAY = 10 ///< std::vector<uint8_t>
+ , INT16_ARRAY ///< std::vector<int16_t>
+ , UINT16_ARRAY ///< std::vector<uint16_t>
+ , INT32_ARRAY ///< std::vector<int32_t>
+ , UINT32_ARRAY ///< std::vector<uint32_t>
+ , FLOAT_ARRAY = 15 ///< std::vector<float>
+};
+
+/// \brief This enum provides additional instructions on interpreting the tag's
+/// value.
+///
+/// Some C++ data types (e.g. std::string) may represent more than one BAM tag
+/// type ('H' vs 'Z'). Thus a TagModifier may be used to indicate how to
+/// properly distinguish between these shared data types.
+///
+enum class TagModifier
+{
+ /// \brief This value indicates that the tag has no modifiers set.
+ ///
+ NONE = 0,
+
+ /// \brief This modifier marks an integer as ASCII.
+ ///
+ /// SAM/BAM has the concept of an ASCII character that is distinct from an
+ /// 8-bit integer. However, there is no such pure separation in C++ - as
+ /// int8_t/uint8_t are likely implemented as typedefs around char/unsigned
+ /// char. Thus this modifier can be used to indicate a tag's value should be
+ /// interpreted as a printable, ASCII character.
+ ///
+ ASCII_CHAR,
+
+ /// \brief This modifier marks std::string data as "hex string", rather than
+ /// a regular string.
+ ///
+ /// SAM/BAM has a distinction between regular strings and "Hex format"
+ /// strings. However, they are both manipulated in C++ via std::string. Thus
+ /// this modifier can be used to indicate that a tag's string data should be
+ /// interpreted as "Hex format" rather than a regular, literal string.
+ ///
+ HEX_STRING
+};
+
+/// \brief The Tag class represents a SAM/BAM record tag value.
+///
+/// SAM/BAM tags may store values from a variety of types: varying fixed-width
+/// integers, strings, arrays of data, etc.
+///
+/// The Tag class allow tags to be handled in a generic fashion, while
+/// maintaining a high level of type-safety. Only those types recognized by the
+/// SAM/BAM format are allowed, and extracting the value from a tag is subject
+/// to allowed conversion rules, as well.
+///
+// Inspired by (but greatly simplified & modified from) the boost::variant
+// wrapper approach taken by DynamicCpp (https://code.google.com/p/dynamic-cpp)
+//
+class PBBAM_EXPORT Tag
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates an empty, null tag
+ Tag(void);
+
+ /// \brief Creates a Tag from a signed 8-bit integer or character.
+ ///
+ /// Without a TagModifier, the resulting Tag will be annotated as containing
+ /// an 8-bit integer, whether the input \p value was an integer or a char.
+ /// For ASCII tags, use one of these methods:
+ /// \include code/Tag_AsciiCtor.txt
+ ///
+ Tag(int8_t value);
+
+ /// \brief Creates a Tag from a signed 8-bit integer or character,
+ /// applying the provided modifier.
+ ///
+ /// This method allows direct construction of an ASCII character, rather
+ /// than an 8-bit integer (e.g. Tag('A', TagModifier::ASCII_CHAR) ).
+ ///
+ /// \throws runtime_error if \p modifier is not valid for int8_t data
+ ///
+ Tag(int8_t value, const TagModifier mod);
+
+ /// \brief Creates a Tag from an unsigned 8-bit integer or character.
+ ///
+ /// Without a TagModifier, the resulting Tag will be annotated as containing
+ /// an 8-bit unsigned integer, whether the input \p value was an integer or
+ /// a char. For ASCII tags, use one of these methods:
+ /// \include code/Tag_AsciiCtor.txt
+ ///
+ Tag(uint8_t value);
+
+ /// \brief Creates a Tag from 16-bit integer.
+ Tag(int16_t value);
+
+ /// \brief Creates a Tag from 16-bit unsigned integer.
+ Tag(uint16_t value);
+
+ /// \brief Creates a Tag from 32-bit signed integer.
+ Tag(int32_t value);
+
+ /// \brief Creates a Tag from 32-bit unsigned integer.
+ Tag(uint32_t value);
+
+ /// \brief Creates a Tag from floating-point value.
+ Tag(float value);
+
+ /// \brief Creates a Tag from string data.
+ Tag(const std::string& value);
+
+ /// \brief Creates a Tag from string data, adding modifier.
+ ///
+ /// \throws runtime_error if \p modifier is not valid for string data
+ ///
+ Tag(const std::string& value, const TagModifier mod);
+
+ /// \brief Creates a Tag from a vector of 8-bit integers.
+ Tag(const std::vector<int8_t>& value);
+
+ /// \brief Creates a Tag from a vector of 8-bit unsigned integers.
+ Tag(const std::vector<uint8_t>& value);
+
+ /// \brief Creates a Tag from a vector of 16-bit integers.
+ Tag(const std::vector<int16_t>& value);
+
+ /// \brief Creates a Tag from a vector of 16-bit unsigned integers.
+ Tag(const std::vector<uint16_t>& value);
+
+ /// Constructs a Tag from a vector of 32-bit integers.
+ Tag(const std::vector<int32_t>& value);
+
+ /// \brief Creates a Tag from a vector of 32-bit unsigned integers.
+ Tag(const std::vector<uint32_t>& value);
+
+ /// \brief Creates a Tag from a vector of floating-point values.
+ Tag(const std::vector<float>& value);
+
+ Tag(const Tag& other);
+ Tag(Tag&& other);
+ ~Tag(void);
+
+ Tag& operator=(boost::blank value);
+ Tag& operator=(int8_t value);
+ Tag& operator=(uint8_t value);
+ Tag& operator=(int16_t value);
+ Tag& operator=(uint16_t value);
+ Tag& operator=(int32_t value);
+ Tag& operator=(uint32_t value);
+ Tag& operator=(float value);
+ Tag& operator=(const std::string& value);
+ Tag& operator=(const std::vector<int8_t>& value);
+ Tag& operator=(const std::vector<uint8_t>& value);
+ Tag& operator=(const std::vector<int16_t>& value);
+ Tag& operator=(const std::vector<uint16_t>& value);
+ Tag& operator=(const std::vector<int32_t>& value);
+ Tag& operator=(const std::vector<uint32_t>& value);
+ Tag& operator=(const std::vector<float>& value);
+ Tag& operator=(const Tag& other);
+ Tag& operator=(Tag&& other);
+
+ bool operator== (const Tag& other) const;
+ bool operator!= (const Tag& other) const;
+
+ /// \}
+
+public:
+ /// \name Data Conversion & Validation
+ /// \{
+
+ /// \brief Converts the tag value to an ASCII character.
+ ///
+ /// Tag must hold an integral type, within the valid ASCII range [33-127].
+ ///
+ /// \returns ASCII character value
+ /// \throws std::runtime_error if not ASCII-compatible
+ ///
+ char ToAscii(void) const;
+
+ /// \returns tag data as signed 8-bit (casting if needed)
+ /// \throws std::runtime_error if not integral data, or out of valid range
+ int8_t ToInt8(void) const;
+
+ /// \returns tag data as unsigned 8-bit (casting if needed)
+ /// \throws std::runtime_error if not integral data, or out of valid range
+ uint8_t ToUInt8(void) const;
+
+ /// \returns tag data as signed 16-bit (casting if needed)
+ /// \throws std::runtime_error if not integral data, or out of valid range
+ int16_t ToInt16(void) const;
+
+ /// \returns tag data as unsigned 16-bit (casting if needed)
+ /// \throws std::runtime_error if not integral data, or out of valid range
+ uint16_t ToUInt16(void) const;
+
+ /// \returns tag data as signed 32-bit (casting if needed)
+ /// \throws std::runtime_error if not integral data, or out of valid range
+ int32_t ToInt32(void) const;
+
+ /// \returns tag data as unsigned 32-bit (casting if needed)
+ /// \throws std::runtime_error if not integral data, or out of valid range
+ uint32_t ToUInt32(void) const;
+
+ /// \returns tag data as float
+ /// \throws std::runtime_error if tag does not contain a value of
+ /// explicit type: float
+ float ToFloat(void) const;
+
+ /// \returns tag data as std::string
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::string
+ std::string ToString(void) const;
+
+ /// \returns tag data as std::vector<int8_t>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<int8_t>
+ std::vector<int8_t> ToInt8Array(void) const;
+
+ /// \returns tag data as std::vector<uint8_t>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<uint8_t>
+ std::vector<uint8_t> ToUInt8Array(void) const;
+
+ /// \returns tag data as std::vector<int16_t>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<int16_t>
+ std::vector<int16_t> ToInt16Array(void) const;
+
+ /// \returns tag data as std::vector<uint16_t>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<uint16_t>
+ std::vector<uint16_t> ToUInt16Array(void) const;
+
+ /// \returns tag data as std::vector<int32_t>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<int32_t>
+ std::vector<int32_t> ToInt32Array(void) const;
+
+ /// \returns tag data as std::vector<uint32_t>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<uint32_t>
+ std::vector<uint32_t> ToUInt32Array(void) const;
+
+ /// \returns tag data as std::vector<float>
+ /// \throws std::runtime_error if tag does not contain a value of explicit
+ /// type: std::vector<float>
+ std::vector<float> ToFloatArray(void) const;
+
+ /// \}
+
+public:
+
+ /// \name Data Conversion & Validation
+ ///
+
+ /// \returns true if tag is null (e.g. default-constructed)
+ bool IsNull(void) const;
+
+ /// \returns true if tag contains a value of type: int8_t
+ bool IsInt8(void) const;
+
+ /// \returns true if tag contains a value of type: uint8_t
+ bool IsUInt8(void) const;
+
+ /// \returns true if tag contains a value of type: int16_t
+ bool IsInt16(void) const;
+
+ /// \returns true if tag contains a value of type: uint16_t
+ bool IsUInt16(void) const;
+
+ /// \returns true if tag contains a value of type: int32_t
+ bool IsInt32(void) const;
+
+ /// \returns true if tag contains a value of type: uint32_t
+ bool IsUInt32(void) const;
+
+ /// \returns true if tag contains a value of type: float
+ bool IsFloat(void) const;
+
+ /// \returns true if tag contains a value of type: std::string
+ bool IsString(void) const;
+
+ /// \returns true if tag contains a value of type: std::string \b AND has a
+ /// TagModifier of TagModifier::HEX_STRING
+ bool IsHexString(void) const;
+
+ /// \returns true if tag contains a value of type: std::vector<int8_t>
+ bool IsInt8Array(void) const;
+
+ /// \returns true if tag contains a value of type: std::vector<uint8_t>
+ bool IsUInt8Array(void) const;
+
+ /// \returns true if tag contains a value of type: std::vector<int16_t>
+ bool IsInt16Array(void) const;
+
+ /// \returns true if tag contains a value of type: std::vector<uint16_t>
+ bool IsUInt16Array(void) const;
+
+ /// \returns true if tag contains a value of type: std::vector<int32_t>
+ bool IsInt32Array(void) const;
+
+ /// \returns true if tag contains a value of type: std::vector<uint32_t>
+ bool IsUInt32Array(void) const;
+
+ /// \returns true if tag contains a value of type: std::vector<float>
+ bool IsFloatArray(void) const;
+
+ /// \returns true if tag contains a value with any signed integer type
+ bool IsSignedInt(void) const;
+
+ /// \returns true if tag contains a value with any unsigned integer type
+ bool IsUnsignedInt(void) const;
+
+ /// \returns true if tag contains a value with any integer type
+ bool IsIntegral(void) const;
+
+ /// \returns true if tag contains a value with any integer or float type
+ bool IsNumeric(void) const;
+
+ /// \returns true if tag contains a vector containing signed integers
+ bool IsSignedArray(void) const;
+
+ /// \returns true if tag contains a vector containing unsigned integers
+ bool IsUnsignedArray(void) const;
+
+ /// \returns true if tag contains a vector containing integers
+ bool IsIntegralArray(void) const;
+
+ /// \returns true if tag contains a vector (integers or floats)
+ bool IsArray(void) const;
+
+ /// \}
+
+public:
+ /// \name Type & Modifier Attributes
+ /// \{
+
+ /// \returns enum value for current tag data
+ TagDataType Type(void) const;
+
+ /// \returns printable type name for current tag data
+ std::string Typename(void) const;
+
+ /// \returns true if tag data modifier \p m is set
+ bool HasModifier(const TagModifier m) const;
+
+ /// \returns current tag data modifier
+ TagModifier Modifier(void) const;
+
+ /// \brief Sets tag data modifier.
+ ///
+ /// \param[in] m new modifier value
+ ///
+ /// \returns reference to this tag
+ Tag& Modifier(const TagModifier m);
+
+ /// \}
+
+private :
+ // NOTE - keep this synced with TagDataType enum ordering
+ typedef boost::variant<boost::blank, // <-- default constructor creates variant of this type
+ int8_t,
+ uint8_t,
+ int16_t,
+ uint16_t,
+ int32_t,
+ uint32_t,
+ float,
+ std::string,
+ std::vector<int8_t>,
+ std::vector<uint8_t>,
+ std::vector<int16_t>,
+ std::vector<uint16_t>,
+ std::vector<int32_t>,
+ std::vector<uint32_t>,
+ std::vector<float> > var_t;
+
+ var_t data_;
+ TagModifier modifier_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/Tag.inl"
+
+#endif // TAG_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file TagCollection.h
+/// \brief Defines the TagCollection class.
+//
+// Author: Derek Barnett
+
+#ifndef TAGCOLLECTION_H
+#define TAGCOLLECTION_H
+
+#include "pbbam/Config.h"
+#include "pbbam/Tag.h"
+#include <map>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The TagCollection class represents a collection (or "dictionary") of
+/// tags.
+///
+/// Tags are mapped to their tag name, a 2-character string.
+///
+class PBBAM_EXPORT TagCollection : public std::map<std::string, Tag>
+{
+public:
+ /// \returns true if the collection contains a tag with \p name
+ bool Contains(const std::string& name) const;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // TAGCOLLECTION_H
--- /dev/null
+// Copyright (c) 2014, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef UNMAPPEDREADSQUERY_H
+#define UNMAPPEDREADSQUERY_H
+
+// TODO: Not yet implemented.
+// Can't get bam_itr_queryi(idx, HTS_IDX_NOCOOR, -1, -1) to work reliably at the moment.
+
+//#include "pbbam/QueryBase.h"
+//#include <string>
+
+//namespace PacBio {
+//namespace BAM {
+
+//class BamFile;
+
+//class PBBAM_EXPORT UnmappedReadsQuery : public QueryBase
+//{
+//public:
+// UnmappedReadsQuery(const BamFile& file);
+
+//protected:
+// bool GetNext(BamRecord& record);
+
+//private:
+// std::shared_ptr<samFile> file_;
+// std::shared_ptr<bam_hdr_t> header_;
+// std::shared_ptr<hts_idx_t> index_;
+// std::shared_ptr<hts_itr_t> iterator_;
+//};
+
+//} // namespace BAM
+//} // namspace PacBio
+
+#endif // UNMAPPEDREADSQUERY_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Validator.h
+/// \brief Defines the Validator class.
+//
+// Author: Derek Barnett
+
+#ifndef VALIDATOR_H
+#define VALIDATOR_H
+
+#include "pbbam/Config.h"
+#include "pbbam/exception/ValidationException.h"
+#include <limits>
+
+namespace PacBio {
+namespace BAM {
+
+class BamFile;
+class BamHeader;
+class BamRecord;
+class ReadGroupInfo;
+
+/// \brief The Validator class provides validation for %BAM data.
+///
+/// There are 2 ways to use this class. If you are only compared with a quick &
+/// dirty, yes/no validation, then you can use the IsValid() methods. This will
+/// swallow the specific cause of the failure, but you don't have to catch an
+/// exception and handle it in your client code. If you want to know,
+/// specifically, what failed, then you can use the Validate*() methods that
+/// will throw a ValidationException if the object is invalid. This exception
+/// will provide more details as to what failed and why.
+///
+/// See documentation for Config.h for details on building pbbam with
+/// auto-validation enabled.
+///
+class PBBAM_EXPORT Validator
+{
+public:
+ /// \brief Checks that a %BAM file conforms to the %PacBio specification.
+ ///
+ /// When \p entireFile is false, this method only checks file metadata. If
+ /// \p entireFile is true, all records are checked as well.
+ ///
+ /// \param[in] file %BAM header to validate
+ /// \param[in] entireFile check records in addition to metadata
+ /// \returns true if \p file passes validation checks
+ ///
+ /// \sa Validator::ValidateFileMetdata, Validator::ValidateEntireFile
+ ///
+ static bool IsValid(const BamFile& file, const bool entireFile);
+
+ /// \brief Checks that a %BAM header conforms to the %PacBio specification.
+ ///
+ /// \returns true if \p header passes validation checks
+ ///
+ /// \sa Validator::Validate(const BamHeader& header)
+ ///
+ static bool IsValid(const BamHeader& header);
+
+ /// \brief Checks that a %BAM read group conforms to the %PacBio
+ /// specification.
+ ///
+ /// \returns true if \p rg passes validation checks
+ ///
+ /// \sa Validator::Validate(const ReadGroupInfo& rg)
+ ///
+ static bool IsValid(const ReadGroupInfo& rg);
+
+ /// \brief Checks that a %BAM record conforms to the %PacBio specification.
+ ///
+ /// \returns true if \p record passes validation checks
+ ///
+ /// \sa Validator::Validate(const BamRecord& record)
+ ///
+ static bool IsValid(const BamRecord& record);
+
+public:
+ /// \brief Checks that a %BAM file's header conforms to the
+ /// %PacBio specification.
+ ///
+ /// This validation step checks the SAM/%BAM version number, sort order,
+ /// PacBioBAM version number, and calls Validate(readGroup) internally for
+ /// all read groups.
+ ///
+ /// \param[in] file %BAM header to validate
+ /// \param[in] maxErrors maximum number of errors to allow before throwing
+ ///
+ /// \throws ValidationException if \p header fails validation checks
+ ///
+ static void Validate(const BamHeader& header,
+ const size_t maxErrors = std::numeric_limits<size_t>::max());
+
+ /// \brief Checks that a %BAM read group conforms to the %PacBio
+ /// specification.
+ ///
+ /// \param[in] rg %BAM read group to validate
+ /// \param[in] maxErrors maximum number of errors to allow before throwing
+ ///
+ /// \throws ValidationException if \p rg fails validation checks
+ ///
+ static void Validate(const ReadGroupInfo& rg,
+ const size_t maxErrors = std::numeric_limits<size_t>::max());
+
+ /// \brief Checks that a %BAM record conforms to the %PacBio specification.
+ ///
+ /// \param[in] record %BAM record to validate
+ /// \param[in] maxErrors maximum number of errors to allow before throwing
+ ///
+ /// \throws ValidationException if \p record fails validation checks
+ ///
+ static void Validate(const BamRecord& record,
+ const size_t maxErrors = std::numeric_limits<size_t>::max());
+
+ /// \brief Checks that a %BAM file's (entire) contents conform to the
+ /// %PacBio specification.
+ ///
+ /// This is equivalent to:
+ ///
+ /// \code
+ /// Validator::ValidateMetadata(file);
+ /// EntireFileQuery query(file);
+ /// for (const BamRecord& record : query)
+ /// Validator::Validate(record);
+ /// \endcode
+ ///
+ /// \param[in] file %BAM file to validate
+ /// \param[in] maxErrors maximum number of errors to allow before throwing
+ ///
+ /// \throws ValidationException if \p file fails validation checks
+ ///
+ static void ValidateEntireFile(const BamFile& file,
+ const size_t maxErrors = std::numeric_limits<size_t>::max());
+
+ /// \brief Checks that a %BAM file's metadata conforms to the
+ /// %PacBio specification.
+ ///
+ /// This validation step checks the filename, ensures EOF marker, and
+ /// presence of PBI. It also calls Validate(file.Header()) internally.
+ ///
+ /// \param[in] file %BAM header to validate
+ /// \param[in] maxErrors maximum number of errors to allow before throwing
+ ///
+ /// \throws ValidationException if \p header fails validation checks
+ ///
+ static void ValidateFileMetadata(const BamFile& file,
+ const size_t maxErrors = std::numeric_limits<size_t>::max());
+
+private:
+ // hidden constructor
+ Validator(void) = delete;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#include "internal/Validator.inl"
+
+#endif // VALIDATOR_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwGroupQuery.h
+/// \brief Defines the ZmwGroupQuery class.
+//
+// Author: Derek Barnett
+
+#ifndef ZMWGROUPQUERY_H
+#define ZMWGROUPQUERY_H
+
+#include "pbbam/internal/QueryBase.h"
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The ZmwGroupQuery class provides iterable access to a DataSet's
+/// %BAM records, limiting results to those matching a ZMW hole number
+/// whitelist, and grouping those results by hole number.
+///
+/// Example:
+/// \include code/ZmwGroupQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+/// Use BamFile::EnsurePacBioIndexExists before creating the query if one
+/// may not be present.
+///
+class PBBAM_EXPORT ZmwGroupQuery : public internal::IGroupQuery
+{
+public:
+ /// \brief Creates a new ZmwGroupQuery, limiting record results to only
+ /// those matching a ZMW hole number criterion.
+ ///
+ /// \param[in] zmwWhitelist vector of allowed ZMW hole numbers
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or
+ /// PBI files.
+ ///
+ ZmwGroupQuery(const std::vector<int32_t>& zmwWhitelist,
+ const DataSet& dataset);
+ ~ZmwGroupQuery(void);
+
+public:
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(std::vector<BamRecord>& records);
+
+private:
+ struct ZmwGroupQueryPrivate;
+ std::unique_ptr<ZmwGroupQueryPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // ZMWGROUPQUERY_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwQuery.h
+/// \brief Defines the ZmwQuery class.
+//
+// Author: Derek Barnett
+
+#ifndef ZMWQUERY_H
+#define ZMWQUERY_H
+
+#include "pbbam/Config.h"
+#include "pbbam/internal/QueryBase.h"
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The ZmwQuery class provides iterable access to a DataSet's
+/// %BAM records, limiting results to those matching a ZMW hole number
+/// whitelist.
+///
+/// Example:
+/// \include code/ZmwQuery.txt
+///
+/// \note Currently, all %BAM files must have a corresponding ".pbi" index file.
+/// Use BamFile::EnsurePacBioIndexExists before creating the query if one
+/// may not be present.
+///
+class PBBAM_EXPORT ZmwQuery : public internal::IQuery
+{
+public:
+ /// \brief Creates a new ZmwQuery, limiting record results to only
+ /// those matching a ZMW hole number criterion.
+ ///
+ /// \param[in] zmwWhitelist vector of allowed ZMW hole numbers
+ /// \param[in] dataset input data source(s)
+ ///
+ /// \throws std::runtime_error on failure to open/read underlying %BAM or
+ /// PBI files.
+ ///
+ ZmwQuery(const std::vector<int32_t>& zmwWhitelist,
+ const DataSet& dataset);
+
+ ~ZmwQuery(void);
+
+public:
+ /// \brief Main iteration point for record access.
+ ///
+ /// Most client code should not need to use this method directly. Use
+ /// iterators instead.
+ ///
+ bool GetNext(BamRecord& r);
+
+private:
+ struct ZmwQueryPrivate;
+ std::unique_ptr<ZmwQueryPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // ZMWQUERY_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwType.h
+/// \brief Defines the ZmwType enum.
+//
+// Author: Armin Töpfer
+
+#ifndef ZMWTYPE_H
+#define ZMWTYPE_H
+
+#include "pbbam/Config.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief This enum defines the different ZMW categories of scraps
+///
+enum class ZmwType : char
+{
+ CONTROL = 'C',
+ MALFORMED = 'M',
+ NORMAL = 'N',
+ SENTINEL = 'S'
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // ZMWTYPE_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwTypeMap.h
+/// \brief Defines the ZmwTypeMap class.
+//
+// Author: Armin Töpfer
+
+#ifndef ZMWTYPEMAP_H
+#define ZMWTYPEMAP_H
+
+#include <map>
+
+#include "pbbam/Config.h"
+#include "pbbam/ZmwType.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The ZmwTypeMap class provides mapping between char codes and
+/// ZmwType enum keys.
+///
+class ZmwTypeMap
+{
+public:
+ static std::map<char, ZmwType> ParseChar;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // ZMWTYPEMAP_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file InvalidSequencingChemistryException.h
+/// \brief Defines the InvalidSequencingChemistryException class.
+//
+// Author: Derek Barnett
+
+#ifndef INVALIDSEQUENCINGCHEMISTRYEXCEPTION_H
+#define INVALIDSEQUENCINGCHEMISTRYEXCEPTION_H
+
+#include <exception>
+#include <sstream>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The InvalidSequencingChemistryException class represents an exception
+/// that will be thrown when an invalid sequencing chemistry combination
+/// is encountered.
+///
+class InvalidSequencingChemistryException : public std::exception
+{
+public:
+ InvalidSequencingChemistryException(const std::string& bindingKit,
+ const std::string& sequencingKit,
+ const std::string& basecallerVersion)
+ : bindingKit_(bindingKit)
+ , sequencingKit_(sequencingKit)
+ , basecallerVersion_(basecallerVersion)
+ {
+ std::stringstream s;
+ s << "unsupported sequencing chemistry combination: " << std::endl
+ << " binding kit: " << bindingKit_ << std::endl
+ << " sequencing kit: " << sequencingKit_ << std::endl
+ << " basecaller version: " << basecallerVersion_ << std::endl;
+ what_ = s.str();
+ }
+
+ // This is a work around for the Intel PHI compiler (icpc)
+ ~InvalidSequencingChemistryException() throw()
+ {
+
+ }
+public:
+ const std::string& BindingKit(void) const
+ { return bindingKit_; }
+
+ const std::string& SequencingKit(void) const
+ { return sequencingKit_; }
+
+ const std::string& BasecallerVersion(void) const
+ { return basecallerVersion_; }
+
+public:
+ const char* what(void) const noexcept override
+ { return what_.c_str(); }
+
+protected:
+ std::string bindingKit_;
+ std::string sequencingKit_;
+ std::string basecallerVersion_;
+ std::string what_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // INVALIDSEQUENCINGCHEMISTRYEXCEPTION_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ValidationException.h
+/// \brief Defines the ValidationException class.
+//
+// Author: Derek Barnett
+
+#ifndef VALIDATIONEXCEPTION_H
+#define VALIDATIONEXCEPTION_H
+
+#include <map>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The ValidationExecption represents an exception that will be thrown
+/// when any error is encountered using the Validator API. In addition to
+/// a default display message, it provides programmatic access to all
+/// reported error messages.
+///
+/// \sa Validator::Validate(const BamRecord& record)
+///
+class ValidationException : public std::runtime_error
+{
+public:
+ typedef std::vector<std::string> ErrorList;
+ typedef std::map<std::string, ErrorList> ErrorMap;
+
+public:
+ ValidationException(const ErrorMap& fileErrors,
+ const ErrorMap& readGroupErrors,
+ const ErrorMap& recordErrors);
+ ValidationException(ErrorMap&& fileErrors,
+ ErrorMap&& readGroupErrors,
+ ErrorMap&& recordErrors);
+ // This is a work around for the Intel PHI compiler (icpc)
+ ~ValidationException() throw()
+ {
+
+ }
+public:
+ const ErrorMap& FileErrors(void) const;
+ const ErrorMap& ReadGroupErrors(void) const;
+ const ErrorMap& RecordErrors(void) const;
+
+ const char* what(void) const noexcept override;
+
+private:
+ ErrorMap fileErrors_;
+ ErrorMap readGroupErrors_;
+ ErrorMap recordErrors_;
+ std::string msg_;
+
+private:
+ void FormatMessage(void);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // VALIDATIONEXCEPTION_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Accuracy.inl
+/// \brief Inline implementations for the Accuracy class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Accuracy.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline Accuracy::Accuracy(float accuracy)
+{
+ if (accuracy < Accuracy::MIN)
+ accuracy = Accuracy::MIN;
+ else if (accuracy > Accuracy::MAX)
+ accuracy = Accuracy::MAX;
+ accuracy_ = accuracy;
+}
+
+inline Accuracy::Accuracy(const Accuracy &other)
+ : accuracy_(other.accuracy_)
+{ }
+
+inline Accuracy::~Accuracy(void) { }
+
+inline Accuracy::operator float(void) const
+{ return accuracy_; }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamHeader.inl
+/// \brief Inline implementations for the BamHeader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamHeader.h"
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class BamHeaderPrivate
+{
+public:
+ std::string version_;
+ std::string pacbioBamVersion_;
+ std::string sortOrder_;
+ std::map<std::string, std::string> headerLineCustom_;
+
+ std::map<std::string, ReadGroupInfo> readGroups_; // id => read group info
+ std::map<std::string, ProgramInfo> programs_; // id => program info
+ std::vector<std::string> comments_;
+
+ // we need to preserve insertion order, use lookup for access by name
+ std::vector<SequenceInfo> sequences_;
+ std::map<std::string, int32_t> sequenceIdLookup_;
+};
+
+} // namespace internal
+
+inline BamHeader::BamHeader(void)
+ : d_(new internal::BamHeaderPrivate)
+{ }
+
+inline BamHeader::BamHeader(const BamHeader& other)
+ : d_(other.d_)
+{ }
+
+inline BamHeader::BamHeader(BamHeader&& other)
+ : d_(std::move(other.d_))
+{ }
+
+inline BamHeader& BamHeader::operator=(const BamHeader& other)
+{ d_ = other.d_; return *this; }
+
+inline BamHeader& BamHeader::operator=(BamHeader&& other)
+{ d_ = std::move(other.d_); return *this; }
+
+inline BamHeader::~BamHeader(void) { }
+
+inline BamHeader BamHeader::operator+(const BamHeader& other) const
+{ return DeepCopy() += other; }
+
+inline BamHeader& BamHeader::AddComment(const std::string& comment)
+{ d_->comments_.push_back(comment); return *this; }
+
+inline BamHeader& BamHeader::AddProgram(const ProgramInfo& pg)
+{ d_->programs_[pg.Id()] = pg; return *this; }
+
+inline BamHeader& BamHeader::AddReadGroup(const ReadGroupInfo& readGroup)
+{ d_->readGroups_[readGroup.Id()] = readGroup; return *this; }
+
+inline BamHeader& BamHeader::ClearComments(void)
+{ d_->comments_.clear(); return* this; }
+
+inline BamHeader& BamHeader::ClearPrograms(void)
+{ d_->programs_.clear(); return *this; }
+
+inline BamHeader& BamHeader::ClearReadGroups(void)
+{ d_->readGroups_.clear(); return *this; }
+
+inline std::vector<std::string> BamHeader::Comments(void) const
+{ return d_->comments_; }
+
+inline BamHeader& BamHeader::Comments(const std::vector<std::string>& comments)
+{ d_->comments_ = comments; return *this; }
+
+inline bool BamHeader::HasProgram(const std::string& id) const
+{ return d_->programs_.find(id) != d_->programs_.cend(); }
+
+inline bool BamHeader::HasReadGroup(const std::string& id) const
+{ return d_->readGroups_.find(id) != d_->readGroups_.cend(); }
+
+inline bool BamHeader::HasSequence(const std::string& name) const
+{ return d_->sequenceIdLookup_.find(name) != d_->sequenceIdLookup_.cend(); }
+
+inline size_t BamHeader::NumSequences(void) const
+{ return d_->sequences_.size(); }
+
+inline std::string BamHeader::PacBioBamVersion(void) const
+{ return d_->pacbioBamVersion_; }
+
+inline SequenceInfo BamHeader::Sequence(const int32_t id) const
+{ return d_->sequences_.at(id); }
+
+inline std::string BamHeader::SequenceLength(const int32_t id) const
+{ return Sequence(id).Length(); }
+
+inline std::string BamHeader::SequenceName(const int32_t id) const
+{ return Sequence(id).Name(); }
+
+inline std::vector<SequenceInfo> BamHeader::Sequences(void) const
+{ return d_->sequences_; }
+
+inline std::string BamHeader::SortOrder(void) const
+{ return d_->sortOrder_; }
+
+inline BamHeader& BamHeader::SortOrder(const std::string& order)
+{ d_->sortOrder_ = order; return *this; }
+
+inline std::string BamHeader::Version(void) const
+{ return d_->version_; }
+
+inline BamHeader& BamHeader::Version(const std::string& version)
+{ d_->version_ = version; return *this; }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecord.inl
+/// \brief Inline implementations for the BamRecord class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamRecord.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline BamRecord BamRecord::Clipped(const BamRecord& input,
+ const ClipType clipType,
+ const PacBio::BAM::Position start,
+ const PacBio::BAM::Position end)
+{
+ return input.Clipped(clipType, start, end);
+}
+
+inline BamRecord BamRecord::Clipped(const ClipType clipType,
+ const PacBio::BAM::Position start,
+ const PacBio::BAM::Position end) const
+{
+ BamRecord result(*this);
+ result.Clip(clipType, start, end);
+ return result;
+}
+
+inline BamRecord BamRecord::Mapped(const BamRecord& input,
+ const int32_t referenceId,
+ const Position refStart,
+ const Strand strand,
+ const Cigar& cigar,
+ const uint8_t mappingQuality)
+{
+ return input.Mapped(referenceId, refStart, strand, cigar, mappingQuality);
+}
+
+inline BamRecord BamRecord::Mapped(const int32_t referenceId,
+ const Position refStart,
+ const Strand strand,
+ const Cigar& cigar,
+ const uint8_t mappingQuality) const
+{
+ BamRecord result(*this);
+ result.Map(referenceId, refStart, strand, cigar, mappingQuality);
+ return result;
+}
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecordBuilder.inl
+/// \brief Inline implementations for the BamRecordBuilder class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamRecordBuilder.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline BamRecordBuilder& BamRecordBuilder::Bin(const uint32_t bin)
+{ core_.bin = bin; return *this; }
+
+inline BamRecordBuilder& BamRecordBuilder::Flag(const uint32_t flag)
+{ core_.flag = flag; return *this; }
+
+inline BamRecordBuilder& BamRecordBuilder::InsertSize(const int32_t iSize)
+{ core_.isize = iSize; return *this; }
+
+inline BamRecordBuilder& BamRecordBuilder::MapQuality(const uint8_t mapQual)
+{ core_.qual = mapQual; return *this; }
+
+inline BamRecordBuilder& BamRecordBuilder::MatePosition(const int32_t pos)
+{ core_.mpos = pos; return *this; }
+
+inline BamRecordBuilder& BamRecordBuilder::MateReferenceId(const int32_t id)
+{ core_.mtid = id; return *this; }
+
+inline BamRecordBuilder& BamRecordBuilder::Position(const int32_t pos)
+{ core_.pos = pos; return *this; }
+
+inline BamRecordBuilder& BamRecordBuilder::Qualities(const std::string& qualities)
+{ qualities_ = qualities; return *this; }
+
+inline BamRecordBuilder& BamRecordBuilder::Qualities(std::string&& qualities)
+{ qualities_ = std::move(qualities); return *this; }
+
+inline BamRecordBuilder& BamRecordBuilder::ReferenceId(const int32_t id)
+{ core_.tid = id; return *this; }
+
+inline BamRecordBuilder& BamRecordBuilder::Tags(const TagCollection& tags)
+{ tags_ = tags; return *this; }
+
+inline BamRecordBuilder& BamRecordBuilder::Tags(TagCollection&& tags)
+{ tags_ = std::move(tags); return *this; }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecordImpl.inl
+/// \brief Inline implementations for the BamRecordImpl class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamRecordImpl.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline uint32_t BamRecordImpl::Bin(void) const
+{ return d_->core.bin; }
+
+inline BamRecordImpl& BamRecordImpl::Bin(uint32_t bin)
+{ d_->core.bin = bin; return *this; }
+
+inline uint32_t BamRecordImpl::Flag(void) const
+{ return d_->core.flag; }
+
+inline BamRecordImpl& BamRecordImpl::Flag(uint32_t flag)
+{ d_->core.flag = flag; return *this; }
+
+inline int32_t BamRecordImpl::InsertSize(void) const
+{ return d_->core.isize; }
+
+inline BamRecordImpl& BamRecordImpl::InsertSize(int32_t iSize)
+{ d_->core.isize = iSize; return *this; }
+
+inline uint8_t BamRecordImpl::MapQuality(void) const
+{ return d_->core.qual; }
+
+inline BamRecordImpl& BamRecordImpl::MapQuality(uint8_t mapQual)
+{ d_->core.qual = mapQual; return *this; }
+
+inline PacBio::BAM::Position BamRecordImpl::MatePosition(void) const
+{ return d_->core.mpos; }
+
+inline BamRecordImpl& BamRecordImpl::MatePosition(PacBio::BAM::Position pos)
+{ d_->core.mpos = pos; return *this; }
+
+inline int32_t BamRecordImpl::MateReferenceId(void) const
+{ return d_->core.mtid; }
+
+inline BamRecordImpl& BamRecordImpl::MateReferenceId(int32_t id)
+{ d_->core.mtid = id; return *this; }
+
+inline PacBio::BAM::Position BamRecordImpl::Position(void) const
+{ return d_->core.pos; }
+
+inline BamRecordImpl& BamRecordImpl::Position(PacBio::BAM::Position pos)
+{ d_->core.pos = pos; return *this; }
+
+inline int32_t BamRecordImpl::ReferenceId(void) const
+{ return d_->core.tid; }
+
+inline BamRecordImpl& BamRecordImpl::ReferenceId(int32_t id)
+{ d_->core.tid = id; return *this; }
+
+inline bool BamRecordImpl::IsDuplicate(void) const
+{ return (d_->core.flag & BamRecordImpl::DUPLICATE) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetDuplicate(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::DUPLICATE;
+ else d_->core.flag &= ~BamRecordImpl::DUPLICATE;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsFailedQC(void) const
+{ return (d_->core.flag & BamRecordImpl::FAILED_QC) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetFailedQC(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::FAILED_QC;
+ else d_->core.flag &= ~BamRecordImpl::FAILED_QC;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsFirstMate(void) const
+{ return (d_->core.flag & BamRecordImpl::MATE_1) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetFirstMate(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::MATE_1;
+ else d_->core.flag &= ~BamRecordImpl::MATE_1;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsMapped(void) const
+{ return (d_->core.flag & BamRecordImpl::UNMAPPED) == 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetMapped(bool ok)
+{
+ if (ok) d_->core.flag &= ~BamRecordImpl::UNMAPPED;
+ else d_->core.flag |= BamRecordImpl::UNMAPPED;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsMateMapped(void) const
+{ return (d_->core.flag & BamRecordImpl::MATE_UNMAPPED) == 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetMateMapped(bool ok)
+{
+ if (ok) d_->core.flag &= ~BamRecordImpl::MATE_UNMAPPED;
+ else d_->core.flag |= BamRecordImpl::MATE_UNMAPPED;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsMateReverseStrand(void) const
+{ return (d_->core.flag & BamRecordImpl::MATE_REVERSE_STRAND) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetMateReverseStrand(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::MATE_REVERSE_STRAND;
+ else d_->core.flag &= ~BamRecordImpl::MATE_REVERSE_STRAND;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsPaired(void) const
+{ return (d_->core.flag & BamRecordImpl::PAIRED) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetPaired(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::PAIRED;
+ else d_->core.flag &= ~BamRecordImpl::PAIRED;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsPrimaryAlignment(void) const
+{ return (d_->core.flag & BamRecordImpl::SECONDARY) == 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetPrimaryAlignment(bool ok)
+{
+ if (ok) d_->core.flag &= ~BamRecordImpl::SECONDARY;
+ else d_->core.flag |= BamRecordImpl::SECONDARY;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsProperPair(void) const
+{ return (d_->core.flag & BamRecordImpl::PROPER_PAIR) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetProperPair(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::PROPER_PAIR;
+ else d_->core.flag &= ~BamRecordImpl::PROPER_PAIR;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsReverseStrand(void) const
+{ return (d_->core.flag & BamRecordImpl::REVERSE_STRAND) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetReverseStrand(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::REVERSE_STRAND;
+ else d_->core.flag &= ~BamRecordImpl::REVERSE_STRAND;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsSecondMate(void) const
+{ return (d_->core.flag & BamRecordImpl::MATE_2) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetSecondMate(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::MATE_2;
+ else d_->core.flag &= ~BamRecordImpl::MATE_2;
+ return *this;
+}
+
+inline bool BamRecordImpl::IsSupplementaryAlignment(void) const
+{ return (d_->core.flag & BamRecordImpl::SUPPLEMENTARY) != 0; }
+
+inline BamRecordImpl& BamRecordImpl::SetSupplementaryAlignment(bool ok)
+{
+ if (ok) d_->core.flag |= BamRecordImpl::SUPPLEMENTARY;
+ else d_->core.flag &= ~BamRecordImpl::SUPPLEMENTARY;
+ return *this;
+}
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecordView.inl
+/// \brief Inline implementations for the BamRecordView class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamRecordView.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline BamRecordView::BamRecordView(const BamRecord& record,
+ const Orientation orientation,
+ const bool aligned,
+ const bool exciseSoftClips,
+ const PulseBehavior pulseBehavior)
+ : record_(record)
+ , orientation_(orientation)
+ , aligned_(aligned)
+ , exciseSoftClips_(exciseSoftClips)
+ , pulseBehavior_(pulseBehavior)
+{ }
+
+inline QualityValues BamRecordView::AltLabelQVs(void) const
+{ return record_.AltLabelQV(orientation_, aligned_, exciseSoftClips_, pulseBehavior_); }
+
+inline std::string BamRecordView::AltLabelTags(void) const
+{ return record_.AltLabelTag(orientation_, aligned_, exciseSoftClips_, pulseBehavior_); }
+
+inline QualityValues BamRecordView::DeletionQVs(void) const
+{ return record_.DeletionQV(orientation_, aligned_, exciseSoftClips_); }
+
+inline std::string BamRecordView::DeletionTags(void) const
+{ return record_.DeletionTag(orientation_, aligned_, exciseSoftClips_); }
+
+inline QualityValues BamRecordView::InsertionQVs(void) const
+{ return record_.InsertionQV(orientation_, aligned_, exciseSoftClips_); }
+
+inline Frames BamRecordView::IPD(void) const
+{ return record_.IPD(orientation_, aligned_, exciseSoftClips_); }
+
+inline Frames BamRecordView::PrebaseFrames(void) const
+{ return record_.IPD(orientation_, aligned_, exciseSoftClips_); }
+
+inline QualityValues BamRecordView::LabelQVs(void) const
+{ return record_.LabelQV(orientation_, aligned_, exciseSoftClips_, pulseBehavior_); }
+
+inline QualityValues BamRecordView::MergeQVs(void) const
+{ return record_.MergeQV(orientation_, aligned_, exciseSoftClips_); }
+
+inline QualityValues BamRecordView::PulseMergeQVs(void) const
+{ return record_.PulseMergeQV(orientation_, aligned_, exciseSoftClips_, pulseBehavior_); }
+
+inline std::vector<float> BamRecordView::Pkmean(void) const
+{ return record_.Pkmean(orientation_, aligned_, exciseSoftClips_, pulseBehavior_); }
+
+inline std::vector<float> BamRecordView::Pkmid(void) const
+{ return record_.Pkmid(orientation_, aligned_, exciseSoftClips_, pulseBehavior_); }
+
+inline std::vector<float> BamRecordView::Pkmean2(void) const
+{ return record_.Pkmean2(orientation_, aligned_, exciseSoftClips_, pulseBehavior_); }
+
+inline std::vector<float> BamRecordView::Pkmid2(void) const
+{ return record_.Pkmid2(orientation_, aligned_, exciseSoftClips_, pulseBehavior_); }
+
+inline Frames BamRecordView::PrePulseFrames(void) const
+{ return record_.PrePulseFrames(orientation_, aligned_, exciseSoftClips_, pulseBehavior_); }
+
+inline std::string BamRecordView::PulseCalls(void) const
+{ return record_.PulseCall(orientation_, aligned_, exciseSoftClips_, pulseBehavior_); }
+
+inline Frames BamRecordView::PulseCallWidth(void) const
+{ return record_.PulseCallWidth(orientation_, aligned_, exciseSoftClips_, pulseBehavior_); }
+
+inline Frames BamRecordView::PulseWidths(void) const
+{ return record_.PulseWidth(orientation_, aligned_, exciseSoftClips_); }
+
+inline QualityValues BamRecordView::Qualities(void) const
+{ return record_.Qualities(orientation_, aligned_, exciseSoftClips_); }
+
+inline std::string BamRecordView::Sequence(void) const
+{ return record_.Sequence(orientation_, aligned_, exciseSoftClips_); }
+
+inline std::vector<uint32_t> BamRecordView::StartFrames(void) const
+{ return record_.StartFrame(orientation_, aligned_, exciseSoftClips_, pulseBehavior_); }
+
+inline QualityValues BamRecordView::SubstitutionQVs(void) const
+{ return record_.SubstitutionQV(orientation_, aligned_, exciseSoftClips_); }
+
+inline std::string BamRecordView::SubstitutionTags(void) const
+{ return record_.SubstitutionTag(orientation_, aligned_, exciseSoftClips_); }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Cigar.inl
+/// \brief Inline implemenations for the Cigar class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Cigar.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline Cigar::Cigar(void)
+ : std::vector<CigarOperation>()
+{ }
+
+inline Cigar::Cigar(const Cigar& other)
+ : std::vector<CigarOperation>(other)
+{ }
+
+inline Cigar::Cigar(Cigar&& other)
+ : std::vector<CigarOperation>(std::move(other))
+{ }
+
+inline Cigar& Cigar::operator=(const Cigar& other)
+{
+ std::vector<CigarOperation>::operator=(other);
+ return *this;
+}
+
+inline Cigar& Cigar::operator=(Cigar&& other)
+{
+ std::vector<CigarOperation>::operator=(std::move(other));
+ return *this;
+}
+
+inline Cigar::~Cigar(void) { }
+
+inline Cigar Cigar::FromStdString(const std::string& stdString)
+{ return Cigar(stdString); }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file CigarOperation.inl
+/// \brief Inline implemenations for the CigarOperation class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/CigarOperation.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline CigarOperation::CigarOperation(void)
+ : type_(CigarOperationType::UNKNOWN_OP)
+ , length_(0)
+{ }
+
+inline CigarOperation::CigarOperation(char c, uint32_t length)
+ : type_(CigarOperation::CharToType(c))
+ , length_(length)
+{
+ if (type_ == CigarOperationType::ALIGNMENT_MATCH)
+ throw std::runtime_error("CIGAR operation 'M' is not allowed in PacBio BAM files. Use 'X/=' instead.");
+}
+
+inline CigarOperation::CigarOperation(CigarOperationType op, uint32_t length)
+ : type_(op)
+ , length_(length)
+{
+ if (type_ == CigarOperationType::ALIGNMENT_MATCH)
+ throw std::runtime_error("CIGAR operation 'M' is not allowed in PacBio BAM files. Use 'X/=' instead.");
+}
+
+inline CigarOperation::CigarOperation(const CigarOperation& other)
+ : type_(other.type_)
+ , length_(other.length_)
+{ }
+
+inline CigarOperation::CigarOperation(CigarOperation&& other)
+ : type_(std::move(other.type_))
+ , length_(std::move(other.length_))
+{ }
+
+inline CigarOperation::~CigarOperation(void) { }
+
+inline uint32_t CigarOperation::Length(void) const
+{ return length_; }
+
+inline CigarOperation& CigarOperation::Length(const uint32_t length)
+{ length_ = length; return *this; }
+
+inline CigarOperationType CigarOperation::Type(void) const
+{ return type_; }
+
+inline CigarOperation &CigarOperation::Type(const CigarOperationType opType)
+{ type_ = opType; return *this; }
+
+inline char CigarOperation::Char(void) const
+{ return CigarOperation::TypeToChar(type_); }
+
+inline CigarOperation &CigarOperation::Char(const char opChar)
+{ type_ = CigarOperation::CharToType(opChar);return *this; }
+
+inline CigarOperation& CigarOperation::operator=(const CigarOperation& other)
+{ type_ = other.type_; length_ = other.length_; return *this; }
+
+inline CigarOperation& CigarOperation::operator=(CigarOperation&& other)
+{ type_ = std::move(other.type_); length_ = std::move(other.length_); return *this; }
+
+inline bool CigarOperation::operator==(const CigarOperation& other) const
+{ return type_ == other.type_ && length_ == other.length_; }
+
+inline bool CigarOperation::operator!=(const CigarOperation& other) const
+{ return !(*this == other); }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Compare.inl
+/// \brief Inline implementations for the Compare class & inner classes.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Compare.h"
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+template <typename T, T> struct MemberFnProxy;
+
+template<typename T, typename R, typename... Args, R (T::*fn)(Args...)const>
+struct MemberFnProxy<R (T::*)(Args...)const, fn>
+{
+ static R call(const T& obj, Args&&... args)
+ {
+ return (obj.*fn)(std::forward<Args>(args)...);
+ }
+};
+
+} // namespace internal
+
+template<typename ValueType,
+ typename Compare::MemberFunctionBaseHelper<ValueType>::MemberFnType fn,
+ typename CompareType>
+inline bool Compare::MemberFunctionBase<ValueType, fn, CompareType>::operator()(const BamRecord& lhs,
+ const BamRecord& rhs) const
+{
+ using MemberFnType = typename Compare::MemberFunctionBaseHelper<ValueType>::MemberFnType;
+ using Proxy = internal::MemberFnProxy<MemberFnType, fn>;
+
+ CompareType cmp;
+ return cmp(Proxy::call(lhs), Proxy::call(rhs));
+}
+
+inline bool Compare::None::operator()(const BamRecord&, const BamRecord&) const
+{ return false; }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.\r
+//\r
+// All rights reserved.\r
+//\r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted (subject to the limitations in the\r
+// disclaimer below) provided that the following conditions are met:\r
+//\r
+// * Redistributions of source code must retain the above copyright\r
+// notice, this list of conditions and the following disclaimer.\r
+//\r
+// * Redistributions in binary form must reproduce the above\r
+// copyright notice, this list of conditions and the following\r
+// disclaimer in the documentation and/or other materials provided\r
+// with the distribution.\r
+//\r
+// * Neither the name of Pacific Biosciences nor the names of its\r
+// contributors may be used to endorse or promote products derived\r
+// from this software without specific prior written permission.\r
+//\r
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE\r
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC\r
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED\r
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\r
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS\r
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\r
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF\r
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\r
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\r
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT\r
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
+// SUCH DAMAGE.\r
+//\r
+// File Description\r
+/// \file CompositeBamReader.inl\r
+/// \brief Inline implementations for the composite BAM readers, for\r
+/// working with multiple input files.\r
+//\r
+// Author: Derek Barnett\r
+\r
+#include "pbbam/CompositeBamReader.h"\r
+#include <algorithm>\r
+#include <set>\r
+#include <sstream>\r
+#include <stdexcept>\r
+\r
+namespace PacBio {\r
+namespace BAM {\r
+namespace internal {\r
+\r
+// -----------------------------------\r
+// Merging helpers\r
+// -----------------------------------\r
+\r
+inline CompositeMergeItem::CompositeMergeItem(std::unique_ptr<BamReader>&& rdr)\r
+ : reader(std::move(rdr))\r
+{ }\r
+\r
+inline CompositeMergeItem::CompositeMergeItem(std::unique_ptr<BamReader>&& rdr,\r
+ BamRecord&& rec)\r
+ : reader(std::move(rdr))\r
+ , record(std::move(rec))\r
+{ }\r
+\r
+inline CompositeMergeItem::CompositeMergeItem(CompositeMergeItem&& other)\r
+ : reader(std::move(other.reader))\r
+ , record(std::move(other.record))\r
+{ }\r
+\r
+inline CompositeMergeItem& CompositeMergeItem::operator=(CompositeMergeItem&& other)\r
+{\r
+ reader = std::move(other.reader);\r
+ record = std::move(other.record);\r
+ return *this;\r
+}\r
+\r
+inline CompositeMergeItem::~CompositeMergeItem(void) { }\r
+\r
+template<typename CompareType>\r
+inline bool CompositeMergeItemSorter<CompareType>::operator()(const CompositeMergeItem& lhs,\r
+ const CompositeMergeItem& rhs)\r
+{\r
+ const BamRecord& l = lhs.record;\r
+ const BamRecord& r = rhs.record;\r
+ return CompareType()(l, r);\r
+}\r
+\r
+} // namespace internal\r
+\r
+// -----------------------------------\r
+// GenomicIntervalCompositeBamReader\r
+// -----------------------------------\r
+\r
+inline GenomicIntervalCompositeBamReader::GenomicIntervalCompositeBamReader(const GenomicInterval& interval,\r
+ const std::vector<BamFile>& bamFiles)\r
+{\r
+ filenames_.reserve(bamFiles.size());\r
+ for(const auto& bamFile : bamFiles)\r
+ filenames_.push_back(bamFile.Filename());\r
+ Interval(interval);\r
+}\r
+\r
+inline GenomicIntervalCompositeBamReader::GenomicIntervalCompositeBamReader(const GenomicInterval& interval,\r
+ std::vector<BamFile>&& bamFiles)\r
+{\r
+ filenames_.reserve(bamFiles.size());\r
+ for(auto&& bamFile : bamFiles)\r
+ filenames_.push_back(bamFile.Filename());\r
+ Interval(interval);\r
+}\r
+\r
+inline GenomicIntervalCompositeBamReader::GenomicIntervalCompositeBamReader(const GenomicInterval& interval,\r
+ const DataSet& dataset)\r
+ : GenomicIntervalCompositeBamReader(interval, dataset.BamFiles())\r
+{ }\r
+\r
+inline bool GenomicIntervalCompositeBamReader::GetNext(BamRecord& record)\r
+{\r
+ // nothing left to read\r
+ if (mergeItems_.empty())\r
+ return false;\r
+\r
+ // non-destructive 'pop' of first item from queue\r
+ auto firstIter = mergeItems_.begin();\r
+ auto firstItem = internal::CompositeMergeItem{ std::move(firstIter->reader), std::move(firstIter->record) };\r
+ mergeItems_.pop_front();\r
+\r
+ // store its record in our output record\r
+ std::swap(record, firstItem.record);\r
+\r
+ // try fetch 'next' from first item's reader\r
+ // if successful, re-insert it into container & re-sort on our new values\r
+ // otherwise, this item will go out of scope & reader destroyed\r
+ if (firstItem.reader->GetNext(firstItem.record)) {\r
+ mergeItems_.push_front(std::move(firstItem));\r
+ UpdateSort();\r
+ }\r
+\r
+ // return success\r
+ return true;\r
+}\r
+\r
+inline const GenomicInterval& GenomicIntervalCompositeBamReader::Interval(void) const\r
+{ return interval_; }\r
+\r
+inline GenomicIntervalCompositeBamReader& GenomicIntervalCompositeBamReader::Interval(const GenomicInterval& interval)\r
+{\r
+ auto updatedMergeItems = std::deque<internal::CompositeMergeItem>{ };\r
+ auto filesToCreate = std::set<std::string>{ filenames_.cbegin(), filenames_.cend() };\r
+\r
+ // update existing readers\r
+ while (!mergeItems_.empty()) {\r
+\r
+ // non-destructive 'pop' of first item from queue\r
+ auto firstIter = mergeItems_.begin();\r
+ auto firstItem = internal::CompositeMergeItem{ std::move(firstIter->reader), std::move(firstIter->record) };\r
+ mergeItems_.pop_front();\r
+\r
+ // reset interval\r
+ BaiIndexedBamReader* baiReader = dynamic_cast<BaiIndexedBamReader*>(firstItem.reader.get());\r
+ assert(baiReader);\r
+ baiReader->Interval(interval);\r
+\r
+ // try fetch 'next' from first item's reader\r
+ // if successful, re-insert it into container & re-sort on our new values\r
+ // otherwise, this item will go out of scope & reader destroyed\r
+ if (firstItem.reader->GetNext(firstItem.record)) {\r
+ updatedMergeItems.push_front(std::move(firstItem));\r
+ filesToCreate.erase(firstItem.reader->Filename());\r
+ }\r
+ }\r
+\r
+ // create readers for files that were not 'active' for the previous\r
+ std::vector<std::string> missingBai;\r
+ for (auto&& fn : filesToCreate) {\r
+ auto bamFile = BamFile{ fn };\r
+ if (bamFile.StandardIndexExists()) {\r
+ auto item = internal::CompositeMergeItem{ std::unique_ptr<BamReader>{ new BaiIndexedBamReader{ interval, std::move(bamFile) } } };\r
+ if (item.reader->GetNext(item.record))\r
+ updatedMergeItems.push_back(std::move(item));\r
+ // else not an error, simply no data matching interval\r
+ }\r
+ else {\r
+ // maybe handle PBI-backed interval searches if BAI missing, but for now treat as error\r
+ missingBai.push_back(bamFile.Filename());\r
+ }\r
+ }\r
+\r
+ // throw if any files missing BAI\r
+ if (!missingBai.empty()) {\r
+ std::stringstream e;\r
+ e << "failed to open GenomicIntervalCompositeBamReader because the following files are missing a BAI file:" << std::endl;\r
+ for (const auto& fn : missingBai)\r
+ e << " " << fn << std::endl;\r
+ throw std::runtime_error(e.str());\r
+ }\r
+\r
+ // update our actual container and return\r
+ mergeItems_ = std::move(updatedMergeItems);\r
+ UpdateSort();\r
+ return *this;\r
+}\r
+\r
+struct OrderByPosition\r
+{\r
+ static inline bool less_than(const BamRecord& lhs, const BamRecord& rhs)\r
+ {\r
+ const int32_t lhsId = lhs.ReferenceId();\r
+ const int32_t rhsId = rhs.ReferenceId();\r
+ if (lhsId == -1) return false;\r
+ if (rhsId == -1) return true;\r
+\r
+ if (lhsId == rhsId)\r
+ return lhs.ReferenceStart() < rhs.ReferenceStart();\r
+ else return lhsId < rhsId;\r
+ }\r
+\r
+ static inline bool equals(const BamRecord& lhs, const BamRecord& rhs)\r
+ {\r
+ return lhs.ReferenceId() == rhs.ReferenceId() &&\r
+ lhs.ReferenceStart() == rhs.ReferenceStart();\r
+ }\r
+};\r
+\r
+struct PositionSorter : std::binary_function<internal::CompositeMergeItem, internal::CompositeMergeItem, bool>\r
+{\r
+ bool operator()(const internal::CompositeMergeItem& lhs,\r
+ const internal::CompositeMergeItem& rhs)\r
+ {\r
+ const BamRecord& l = lhs.record;\r
+ const BamRecord& r = rhs.record;\r
+ return OrderByPosition::less_than(l, r);\r
+ }\r
+};\r
+\r
+inline void GenomicIntervalCompositeBamReader::UpdateSort(void)\r
+{ std::sort(mergeItems_.begin(), mergeItems_.end(), PositionSorter{ }); }\r
+\r
+// ------------------------------\r
+// PbiRequestCompositeBamReader\r
+// ------------------------------\r
+\r
+template<typename OrderByType>\r
+inline PbiFilterCompositeBamReader<OrderByType>::PbiFilterCompositeBamReader(const PbiFilter& filter,\r
+ const std::vector<BamFile>& bamFiles)\r
+{\r
+ filenames_.reserve(bamFiles.size());\r
+ for(const auto& bamFile : bamFiles)\r
+ filenames_.push_back(bamFile.Filename());\r
+ Filter(filter);\r
+}\r
+\r
+template<typename OrderByType>\r
+inline PbiFilterCompositeBamReader<OrderByType>::PbiFilterCompositeBamReader(const PbiFilter& filter,\r
+ std::vector<BamFile>&& bamFiles)\r
+{\r
+ filenames_.reserve(bamFiles.size());\r
+ for(auto&& bamFile : bamFiles)\r
+ filenames_.push_back(bamFile.Filename());\r
+ Filter(filter);\r
+}\r
+\r
+template<typename OrderByType>\r
+inline PbiFilterCompositeBamReader<OrderByType>::PbiFilterCompositeBamReader(const PbiFilter& filter,\r
+ const DataSet& dataset)\r
+ : PbiFilterCompositeBamReader(filter, std::move(dataset.BamFiles()))\r
+{ }\r
+\r
+template<typename OrderByType>\r
+inline bool PbiFilterCompositeBamReader<OrderByType>::GetNext(BamRecord& record)\r
+{\r
+ // nothing left to read\r
+ if (mergeQueue_.empty())\r
+ return false;\r
+\r
+ // non-destructive 'pop' of first item from queue\r
+ auto firstIter = mergeQueue_.begin();\r
+ auto firstItem = value_type{ std::move(firstIter->reader), std::move(firstIter->record) };\r
+ mergeQueue_.pop_front();\r
+\r
+ // store its record in our output record\r
+ std::swap(record, firstItem.record);\r
+\r
+ // try fetch 'next' from first item's reader\r
+ // if successful, re-insert it into container & re-sort on our new values\r
+ // otherwise, this item will go out of scope & reader destroyed\r
+ if (firstItem.reader->GetNext(firstItem.record)) {\r
+ mergeQueue_.push_front(std::move(firstItem));\r
+ UpdateSort();\r
+ }\r
+\r
+ // return success\r
+ return true;\r
+}\r
+\r
+template<typename OrderByType>\r
+inline PbiFilterCompositeBamReader<OrderByType>&\r
+PbiFilterCompositeBamReader<OrderByType>::Filter(const PbiFilter& filter)\r
+{\r
+ auto updatedMergeItems = container_type{ };\r
+ auto filesToCreate = std::set<std::string>{ filenames_.cbegin(), filenames_.cend() };\r
+\r
+ // update existing readers\r
+ while (!mergeQueue_.empty()) {\r
+\r
+ // non-destructive 'pop' of first item from queue\r
+ auto firstIter = mergeQueue_.begin();\r
+ auto firstItem = internal::CompositeMergeItem{ std::move(firstIter->reader), std::move(firstIter->record) };\r
+ mergeQueue_.pop_front();\r
+\r
+ // reset request\r
+ PbiIndexedBamReader* pbiReader = dynamic_cast<PbiIndexedBamReader*>(firstItem.reader.get());\r
+ assert(pbiReader);\r
+ pbiReader->Filter(filter);\r
+\r
+ // try fetch 'next' from first item's reader\r
+ // if successful, re-insert it into container & re-sort on our new values\r
+ // otherwise, this item will go out of scope & reader destroyed\r
+ if (firstItem.reader->GetNext(firstItem.record)) {\r
+ updatedMergeItems.push_front(std::move(firstItem));\r
+ filesToCreate.erase(firstItem.reader->Filename());\r
+ }\r
+ }\r
+\r
+ // create readers for files that were not 'active' for the previous\r
+ std::vector<std::string> missingPbi;\r
+ for (auto&& fn : filesToCreate) {\r
+ auto bamFile = BamFile{ fn };\r
+ if (bamFile.PacBioIndexExists()) {\r
+ auto item = internal::CompositeMergeItem{ std::unique_ptr<BamReader>{ new PbiIndexedBamReader{ filter, std::move(bamFile) } } };\r
+ if (item.reader->GetNext(item.record))\r
+ updatedMergeItems.push_back(std::move(item));\r
+ // else not an error, simply no data matching filter\r
+ }\r
+ else\r
+ missingPbi.push_back(fn);\r
+ }\r
+\r
+ // throw if any files missing PBI\r
+ if (!missingPbi.empty()) {\r
+ std::stringstream e;\r
+ e << "failed to open PbiFilterCompositeBamReader because the following files are missing a PBI file:" << std::endl;\r
+ for (const auto& fn : missingPbi)\r
+ e << " " << fn << std::endl;\r
+ throw std::runtime_error(e.str());\r
+ }\r
+\r
+ // update our actual container and return\r
+ mergeQueue_ = std::move(updatedMergeItems);\r
+ UpdateSort();\r
+ return *this;\r
+}\r
+\r
+template<typename OrderByType>\r
+inline void PbiFilterCompositeBamReader<OrderByType>::UpdateSort(void)\r
+{ std::stable_sort(mergeQueue_.begin(), mergeQueue_.end(), merge_sorter_type{}); }\r
+\r
+// ------------------------------\r
+// SequentialCompositeBamReader\r
+// ------------------------------\r
+\r
+inline SequentialCompositeBamReader::SequentialCompositeBamReader(const std::vector<BamFile>& bamFiles)\r
+{\r
+ for (auto&& bamFile : bamFiles)\r
+ readers_.emplace_back(new BamReader{ bamFile });\r
+}\r
+\r
+inline SequentialCompositeBamReader::SequentialCompositeBamReader(std::vector<BamFile>&& bamFiles)\r
+{\r
+ for (auto&& bamFile : bamFiles)\r
+ readers_.emplace_back(new BamReader{ std::move(bamFile) });\r
+}\r
+\r
+inline SequentialCompositeBamReader::SequentialCompositeBamReader(const DataSet& dataset)\r
+ : SequentialCompositeBamReader(dataset.BamFiles())\r
+{ }\r
+\r
+inline bool SequentialCompositeBamReader::GetNext(BamRecord& record)\r
+{\r
+ // try first reader, if successful return true\r
+ // else pop reader and try next, until all readers exhausted\r
+ while (!readers_.empty()) {\r
+ auto& reader = readers_.front();\r
+ if (reader->GetNext(record))\r
+ return true;\r
+ else\r
+ readers_.pop_front();\r
+ }\r
+\r
+ // no readers available\r
+ return false;\r
+}\r
+\r
+} // namespace BAM\r
+} // namespace PacBio\r
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file DataSet.inl
+/// \brief Inline implementations for the DataSet class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/DataSet.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline const std::string& DataSet::Attribute(const std::string& name) const
+{ return d_->Attribute(name); }
+
+inline std::string& DataSet::Attribute(const std::string& name)
+{ return d_->Attribute(name); }
+
+inline DataSet& DataSet::Attribute(const std::string& name, const std::string& value)
+{ d_->Attribute(name, value); return *this; }
+
+inline const std::string& DataSet::CreatedAt(void) const
+{ return d_->CreatedAt(); }
+
+inline std::string& DataSet::CreatedAt(void)
+{ return d_->CreatedAt(); }
+
+inline DataSet& DataSet::CreatedAt(const std::string& createdAt)
+{ d_->CreatedAt(createdAt); return *this; }
+
+inline const PacBio::BAM::Extensions& DataSet::Extensions(void) const
+{ return d_->Extensions(); }
+
+inline PacBio::BAM::Extensions& DataSet::Extensions(void)
+{ return d_->Extensions(); }
+
+inline DataSet& DataSet::Extensions(const PacBio::BAM::Extensions& extensions)
+{ d_->Extensions(extensions); return *this; }
+
+inline const PacBio::BAM::ExternalResources& DataSet::ExternalResources(void) const
+{ return d_->ExternalResources(); }
+
+inline PacBio::BAM::ExternalResources& DataSet::ExternalResources(void)
+{ return d_->ExternalResources(); }
+
+inline DataSet& DataSet::ExternalResources(const PacBio::BAM::ExternalResources& resources)
+{ d_->ExternalResources(resources); return *this; }
+
+inline const PacBio::BAM::Filters& DataSet::Filters(void) const
+{ return d_->Filters(); }
+
+inline PacBio::BAM::Filters& DataSet::Filters(void)
+{ return d_->Filters(); }
+
+inline DataSet& DataSet::Filters(const PacBio::BAM::Filters& filters)
+{ d_->Filters(filters); return *this; }
+
+inline const std::string& DataSet::Format(void) const
+{ return d_->Format(); }
+
+inline std::string& DataSet::Format(void)
+{ return d_->Format(); }
+
+inline DataSet& DataSet::Format(const std::string& format)
+{ d_->Format(format); return *this; }
+
+inline const PacBio::BAM::DataSetMetadata& DataSet::Metadata(void) const
+{ return d_->Metadata(); }
+
+inline PacBio::BAM::DataSetMetadata& DataSet::Metadata(void)
+{ return d_->Metadata(); }
+
+inline DataSet& DataSet::Metadata(const PacBio::BAM::DataSetMetadata& metadata)
+{ d_->Metadata(metadata); return *this; }
+
+inline const std::string& DataSet::MetaType(void) const
+{ return d_->MetaType(); }
+
+inline std::string& DataSet::MetaType(void)
+{ return d_->MetaType(); }
+
+inline DataSet& DataSet::MetaType(const std::string& metatype)
+{ d_->MetaType(metatype); return *this; }
+
+inline const std::string& DataSet::ModifiedAt(void) const
+{ return d_->ModifiedAt(); }
+
+inline std::string& DataSet::ModifiedAt(void)
+{ return d_->ModifiedAt(); }
+
+inline DataSet& DataSet::ModifiedAt(const std::string& modifiedAt)
+{ d_->ModifiedAt(modifiedAt); return *this; }
+
+inline const std::string& DataSet::Name(void) const
+{ return d_->Name(); }
+
+inline std::string& DataSet::Name(void)
+{ return d_->Name(); }
+
+inline DataSet& DataSet::Name(const std::string& name)
+{ d_->Name(name); return *this; }
+
+inline const std::string& DataSet::ResourceId(void) const
+{ return d_->ResourceId(); }
+
+inline std::string& DataSet::ResourceId(void)
+{ return d_->ResourceId(); }
+
+inline DataSet& DataSet::ResourceId(const std::string& resourceId)
+{ d_->ResourceId(resourceId); return *this; }
+
+inline const PacBio::BAM::SubDataSets& DataSet::SubDataSets(void) const
+{ return d_->SubDataSets(); }
+
+inline PacBio::BAM::SubDataSets& DataSet::SubDataSets(void)
+{ return d_->SubDataSets(); }
+
+inline DataSet& DataSet::SubDataSets(const PacBio::BAM::SubDataSets& subdatasets)
+{ d_->SubDataSets(subdatasets); return *this; }
+
+inline const std::string& DataSet::Tags(void) const
+{ return d_->Tags(); }
+
+inline std::string& DataSet::Tags(void)
+{ return d_->Tags(); }
+
+inline DataSet& DataSet::Tags(const std::string& tags)
+{ d_->Tags(tags); return *this; }
+
+inline const std::string& DataSet::TimeStampedName(void) const
+{ return d_->TimeStampedName(); }
+
+inline std::string& DataSet::TimeStampedName(void)
+{ return d_->TimeStampedName(); }
+
+inline DataSet& DataSet::TimeStampedName(const std::string& timeStampedName)
+{ d_->TimeStampedName(timeStampedName); return *this; }
+
+inline PacBio::BAM::DataSet::TypeEnum DataSet::Type(void) const
+{ return DataSet::NameToType(TypeName()); }
+
+inline DataSet& DataSet::Type(const DataSet::TypeEnum type)
+{ d_->Label(DataSet::TypeToName(type)); return *this; }
+
+inline std::string DataSet::TypeName(void) const
+{ return d_->LocalNameLabel().to_string(); }
+
+inline const std::string& DataSet::UniqueId(void) const
+{ return d_->UniqueId(); }
+
+inline std::string& DataSet::UniqueId(void)
+{ return d_->UniqueId(); }
+
+inline DataSet& DataSet::UniqueId(const std::string& uuid)
+{ d_->UniqueId(uuid); return *this; }
+
+inline const std::string& DataSet::Version(void) const
+{ return d_->Version(); }
+
+inline std::string& DataSet::Version(void)
+{ return d_->Version(); }
+
+inline DataSet& DataSet::Version(const std::string& version)
+{ d_->Version(version); return *this; }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef DATASETBASETYPES_H
+#define DATASETBASETYPES_H
+
+#include "pbbam/Config.h"
+#include "pbbam/internal/DataSetElement.h"
+#include "pbbam/internal/DataSetListElement.h"
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+class DataSetMetadata;
+class Extensions;
+class ExternalResources;
+class FileIndices;
+class Filters;
+class Properties;
+class Provenance;
+
+namespace internal {
+
+class BaseEntityType : public DataSetElement
+{
+protected:
+ BaseEntityType(const std::string& label,
+ const XsdType& xsd = XsdType::BASE_DATA_MODEL);
+
+public:
+ const std::string& CreatedAt(void) const;
+ const std::string& Description(void) const;
+ const PacBio::BAM::Extensions& Extensions(void) const;
+ const std::string& Format(void) const;
+ const std::string& ModifiedAt(void) const;
+ const std::string& Name(void) const;
+ const std::string& ResourceId(void) const;
+ const std::string& Tags(void) const;
+ const std::string& Version(void) const;
+
+ std::string& CreatedAt(void);
+ std::string& Description(void);
+ PacBio::BAM::Extensions& Extensions(void);
+ std::string& Format(void);
+ std::string& ModifiedAt(void);
+ std::string& Name(void);
+ std::string& ResourceId(void);
+ std::string& Tags(void);
+ std::string& Version(void);
+
+ BaseEntityType& CreatedAt(const std::string& createdAt);
+ BaseEntityType& Description(const std::string& description);
+ BaseEntityType& Extensions(const PacBio::BAM::Extensions& extensions);
+ BaseEntityType& Format(const std::string& format);
+ BaseEntityType& ModifiedAt(const std::string& modifiedAt);
+ BaseEntityType& Name(const std::string& name);
+ BaseEntityType& ResourceId(const std::string& resourceId);
+ BaseEntityType& Tags(const std::string& tags);
+ BaseEntityType& Version(const std::string& version);
+};
+
+class DataEntityType : public BaseEntityType
+{
+protected:
+ DataEntityType(const std::string& label,
+ const XsdType& xsd = XsdType::BASE_DATA_MODEL);
+
+public:
+ const std::string& Checksum(void) const;
+ const std::string& EncodedValue(void) const;
+ const std::string& MetaType(void) const;
+ const std::string& SimpleValue(void) const;
+ const std::string& TimeStampedName(void) const;
+ const std::string& UniqueId(void) const;
+ const std::string& ValueDataType(void) const;
+
+ std::string& Checksum(void);
+ std::string& EncodedValue(void);
+ std::string& MetaType(void);
+ std::string& SimpleValue(void);
+ std::string& TimeStampedName(void);
+ std::string& UniqueId(void);
+ std::string& ValueDataType(void);
+
+ DataEntityType& Checksum(const std::string& checksum);
+ DataEntityType& EncodedValue(const std::string& encodedValue);
+ DataEntityType& MetaType(const std::string& metatype);
+ DataEntityType& SimpleValue(const std::string& simpleValue);
+ DataEntityType& TimeStampedName(const std::string& timeStampedName);
+ DataEntityType& UniqueId(const std::string& uuid);
+ DataEntityType& ValueDataType(const std::string& valueDataType);
+};
+
+class StrictEntityType : public BaseEntityType
+{
+protected:
+ StrictEntityType(const std::string& metatype,
+ const std::string& label,
+ const XsdType& xsd = XsdType::BASE_DATA_MODEL);
+
+public:
+ const std::string& MetaType(void) const;
+ const std::string& TimeStampedName(void) const;
+ const std::string& UniqueId(void) const;
+
+ std::string& MetaType(void);
+ std::string& TimeStampedName(void);
+ std::string& UniqueId(void);
+
+ StrictEntityType& MetaType(const std::string& metatype);
+ StrictEntityType& TimeStampedName(const std::string& timeStampedName);
+ StrictEntityType& UniqueId(const std::string& uuid);
+};
+
+class InputOutputDataType : public StrictEntityType
+{
+protected:
+ InputOutputDataType(const std::string& metatype,
+ const std::string& filename,
+ const std::string& label,
+ const XsdType& xsd = XsdType::BASE_DATA_MODEL);
+};
+
+class IndexedDataType : public InputOutputDataType
+{
+protected:
+ IndexedDataType(const std::string& metatype,
+ const std::string& filename,
+ const std::string& label,
+ const XsdType& xsd = XsdType::BASE_DATA_MODEL);
+
+public:
+ const PacBio::BAM::FileIndices& FileIndices(void) const;
+ PacBio::BAM::FileIndices& FileIndices(void);
+ IndexedDataType& FileIndices(const PacBio::BAM::FileIndices& indices);
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/DataSetBaseTypes.inl"
+
+#endif // DATASETBASETYPES_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "pbbam/internal/DataSetBaseTypes.h"
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// ----------------
+// BaseEntityType
+// ----------------
+
+inline const std::string& BaseEntityType::CreatedAt(void) const
+{ return Attribute("CreatedAt"); }
+
+inline std::string& BaseEntityType::CreatedAt(void)
+{ return Attribute("CreatedAt"); }
+
+inline BaseEntityType& BaseEntityType::CreatedAt(const std::string& createdAt)
+{ Attribute("CreatedAt", createdAt); return *this; }
+
+inline const std::string& BaseEntityType::Description(void) const
+{ return Attribute("Description"); }
+
+inline std::string& BaseEntityType::Description(void)
+{ return Attribute("Description"); }
+
+inline BaseEntityType& BaseEntityType::Description(const std::string& description)
+{ Attribute("Description", description); return *this; }
+
+inline const std::string& BaseEntityType::Format(void) const
+{ return Attribute("Format"); }
+
+inline std::string& BaseEntityType::Format(void)
+{ return Attribute("Format"); }
+
+inline BaseEntityType& BaseEntityType::Format(const std::string& format)
+{ Attribute("Format", format); return *this; }
+
+inline const std::string& BaseEntityType::ModifiedAt(void) const
+{ return Attribute("ModifiedAt"); }
+
+inline std::string& BaseEntityType::ModifiedAt(void)
+{ return Attribute("ModifiedAt"); }
+
+inline BaseEntityType& BaseEntityType::ModifiedAt(const std::string& modifiedAt)
+{ Attribute("ModifiedAt", modifiedAt); return *this; }
+
+inline const std::string& BaseEntityType::Name(void) const
+{ return Attribute("Name"); }
+
+inline std::string& BaseEntityType::Name(void)
+{ return Attribute("Name"); }
+
+inline BaseEntityType& BaseEntityType::Name(const std::string& name)
+{ Attribute("Name", name); return *this; }
+
+inline const std::string& BaseEntityType::ResourceId(void) const
+{ return Attribute("ResourceId"); }
+
+inline std::string& BaseEntityType::ResourceId(void)
+{ return Attribute("ResourceId"); }
+
+inline BaseEntityType& BaseEntityType::ResourceId(const std::string& resourceId)
+{ Attribute("ResourceId", resourceId); return *this; }
+
+inline const std::string& BaseEntityType::Tags(void) const
+{ return Attribute("Tags"); }
+
+inline std::string& BaseEntityType::Tags(void)
+{ return Attribute("Tags"); }
+
+inline BaseEntityType& BaseEntityType::Tags(const std::string& tags)
+{ Attribute("Tags", tags); return *this; }
+
+inline const std::string& BaseEntityType::Version(void) const
+{ return Attribute("Version"); }
+
+inline std::string& BaseEntityType::Version(void)
+{ return Attribute("Version"); }
+
+inline BaseEntityType& BaseEntityType::Version(const std::string& version)
+{ Attribute("Version", version); return *this; }
+
+// ----------------
+// DataEntityType
+// ----------------
+
+inline const std::string& DataEntityType::Checksum(void) const
+{ return ChildText("Checksum"); }
+
+inline std::string& DataEntityType::Checksum(void)
+{ return ChildText("Checksum"); }
+
+inline DataEntityType& DataEntityType::Checksum(const std::string& checksum)
+{ ChildText("Checksum", checksum); return *this; }
+
+inline const std::string& DataEntityType::EncodedValue(void) const
+{ return ChildText("EncodedValue"); }
+
+inline std::string& DataEntityType::EncodedValue(void)
+{ return ChildText("EncodedValue"); }
+
+inline DataEntityType& DataEntityType::EncodedValue(const std::string& encodedValue)
+{ ChildText("EncodedValue", encodedValue); return *this; }
+
+inline const std::string& DataEntityType::MetaType(void) const
+{ return Attribute("MetaType"); }
+
+inline std::string& DataEntityType::MetaType(void)
+{ return Attribute("MetaType"); }
+
+inline DataEntityType& DataEntityType::MetaType(const std::string& metatype)
+{ Attribute("MetaType", metatype); return *this; }
+
+inline const std::string& DataEntityType::SimpleValue(void) const
+{ return Attribute("SimpleValue"); }
+
+inline std::string& DataEntityType::SimpleValue(void)
+{ return Attribute("SimpleValue"); }
+
+inline DataEntityType& DataEntityType::SimpleValue(const std::string& simpleValue)
+{ Attribute("SimpleValue", simpleValue); return *this; }
+
+inline const std::string& DataEntityType::TimeStampedName(void) const
+{ return Attribute("TimeStampedName"); }
+
+inline std::string& DataEntityType::TimeStampedName(void)
+{ return Attribute("TimeStampedName"); }
+
+inline DataEntityType& DataEntityType::TimeStampedName(const std::string& timeStampedName)
+{ Attribute("TimeStampedName", timeStampedName); return *this; }
+
+inline const std::string& DataEntityType::UniqueId(void) const
+{ return Attribute("UniqueId"); }
+
+inline std::string& DataEntityType::UniqueId(void)
+{ return Attribute("UniqueId"); }
+
+inline DataEntityType& DataEntityType::UniqueId(const std::string& uuid)
+{ Attribute("UniqueId", uuid); return *this; }
+
+inline const std::string& DataEntityType::ValueDataType(void) const
+{ return Attribute("ValueDataType"); }
+
+inline std::string& DataEntityType::ValueDataType(void)
+{ return Attribute("ValueDataType"); }
+
+inline DataEntityType& DataEntityType::ValueDataType(const std::string& valueDataType)
+{ Attribute("ValueDataType", valueDataType); return *this; }
+
+// ----------------
+// StrictEntityType
+// ----------------
+
+inline const std::string& StrictEntityType::MetaType(void) const
+{ return Attribute("MetaType"); }
+
+inline std::string& StrictEntityType::MetaType(void)
+{ return Attribute("MetaType"); }
+
+inline StrictEntityType& StrictEntityType::MetaType(const std::string& metatype)
+{ Attribute("MetaType", metatype); return *this; }
+
+inline const std::string& StrictEntityType::TimeStampedName(void) const
+{ return Attribute("TimeStampedName"); }
+
+inline std::string& StrictEntityType::TimeStampedName(void)
+{ return Attribute("TimeStampedName"); }
+
+inline StrictEntityType& StrictEntityType::TimeStampedName(const std::string& timeStampedName)
+{ Attribute("TimeStampedName", timeStampedName); return *this; }
+
+inline const std::string& StrictEntityType::UniqueId(void) const
+{ return Attribute("UniqueId"); }
+
+inline std::string& StrictEntityType::UniqueId(void)
+{ return Attribute("UniqueId"); }
+
+inline StrictEntityType& StrictEntityType::UniqueId(const std::string& uuid)
+{ Attribute("UniqueId", uuid); return *this; }
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef DATASETELEMENT_H
+#define DATASETELEMENT_H
+
+#include "pbbam/DataSetXsd.h"
+
+#include <boost/utility/string_ref.hpp>
+#include <algorithm>
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <vector>
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class XmlName
+{
+ // qualified name
+ // |
+ // --------------
+ // <pbns:node_name >
+ // ---- ---------
+ // | |
+ // prefix local name
+
+public:
+ XmlName(const std::string& fullName, bool verbatim = false);
+ XmlName(const std::string& localName, const std::string& prefix);
+ XmlName(const XmlName& other);
+ XmlName(XmlName&& other);
+ XmlName& operator=(const XmlName& other);
+ XmlName& operator=(XmlName&& other);
+ ~XmlName(void);
+
+public:
+ bool operator==(const XmlName& other) const;
+ bool operator!=(const XmlName& other) const;
+
+public:
+ const boost::string_ref LocalName(void) const;
+ const boost::string_ref Prefix(void) const;
+ const std::string& QualifiedName(void) const;
+ bool Verbatim(void) const;
+
+private:
+ std::string qualifiedName_;
+ size_t prefixSize_;
+ size_t localNameOffset_;
+ size_t localNameSize_;
+ bool verbatim_;
+};
+
+struct FromInputXml { };
+
+class DataSetElement
+{
+public:
+ DataSetElement(const std::string& label, const XsdType& xsd = XsdType::NONE);
+ DataSetElement(const std::string& label, const FromInputXml& fromInputXml, const XsdType& xsd = XsdType::NONE);
+ DataSetElement(const DataSetElement& other);
+ DataSetElement(DataSetElement&& other);
+ DataSetElement& operator=(const DataSetElement& other);
+ DataSetElement& operator=(DataSetElement&& other);
+ virtual ~DataSetElement(void);
+
+public:
+ bool operator==(const DataSetElement& other) const;
+ bool operator!=(const DataSetElement& other) const;
+
+public:
+ const std::string& Attribute(const std::string& name) const;
+ std::string& Attribute(const std::string& name);
+ const std::map<std::string, std::string>& Attributes(void) const;
+ std::map<std::string, std::string>& Attributes(void);
+ bool HasAttribute(const std::string& name) const;
+
+ const std::vector<DataSetElement>& Children(void) const;
+ std::vector<DataSetElement>& Children(void);
+ bool HasChild(const std::string& label) const;
+
+ const boost::string_ref LocalNameLabel(void) const;
+ const boost::string_ref PrefixLabel(void) const;
+ const std::string& QualifiedNameLabel(void) const;
+ bool IsVerbatimLabel(void) const;
+
+ const std::string& Text(void) const;
+ std::string& Text(void);
+
+ const XsdType& Xsd(void) const;
+
+public:
+ void Attribute(const std::string& name, const std::string& value);
+ void Label(const std::string& label);
+ void Text(const std::string& text);
+
+public:
+ size_t NumAttributes(void) const;
+ size_t NumChildren(void) const;
+
+public:
+ void AddChild(const DataSetElement& e);
+ void RemoveChild(const DataSetElement& e);
+
+ template<typename T>
+ const T& Child(size_t index) const;
+
+ template<typename T>
+ T& Child(size_t index);
+
+ template<typename T>
+ const T& Child(const std::string& label) const;
+
+ template<typename T>
+ T& Child(const std::string& label);
+
+ template<typename T>
+ const T& operator[](size_t index) const;
+
+ template<typename T>
+ T& operator[](size_t index);
+
+ template<typename T = DataSetElement>
+ const T& operator[](const std::string& label) const;
+
+ template<typename T = DataSetElement>
+ T& operator[](const std::string& label);
+
+protected:
+ static const std::string& SharedNullString(void);
+
+public:
+ const std::string& ChildText(const std::string& label) const;
+ std::string& ChildText(const std::string& label);
+ void ChildText(const std::string& label, const std::string& text);
+
+protected:
+ XsdType xsd_;
+ XmlName label_;
+ std::string text_;
+ std::map<std::string, std::string> attributes_;
+ std::vector<DataSetElement> children_;
+
+private:
+ int IndexOf(const std::string& label) const;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/DataSetElement.inl"
+
+#endif // DATASETELEMENT_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "pbbam/internal/DataSetElement.h"
+
+#include <iostream>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// ----------------
+// DataSetElement
+// ----------------
+
+inline DataSetElement::DataSetElement(const std::string& label, const XsdType& xsd)
+ : xsd_(xsd)
+ , label_(label)
+{ }
+
+inline DataSetElement::DataSetElement(const std::string& label,
+ const FromInputXml&,
+ const XsdType& xsd)
+ : xsd_(xsd)
+ , label_(label, true)
+{ }
+
+inline DataSetElement::DataSetElement(const DataSetElement& other)
+ : xsd_(other.xsd_)
+ , label_(other.label_)
+ , text_(other.text_)
+ , attributes_(other.attributes_)
+ , children_(other.children_)
+{ }
+
+inline DataSetElement::DataSetElement(DataSetElement&& other)
+ : xsd_(std::move(other.xsd_))
+ , label_(std::move(other.label_))
+ , text_(std::move(other.text_))
+ , attributes_(std::move(other.attributes_))
+ , children_(std::move(other.children_))
+{ }
+
+inline DataSetElement& DataSetElement::operator=(const DataSetElement& other)
+{
+ xsd_ = other.xsd_;
+ label_ = other.label_;
+ text_ = other.text_;
+ attributes_ = other.attributes_;
+ children_ = other.children_;
+ return *this;
+}
+
+inline DataSetElement& DataSetElement::operator=(DataSetElement&& other)
+{
+ xsd_ = std::move(other.xsd_);
+ label_ = std::move(other.label_);
+ text_ = std::move(other.text_);
+ attributes_ = std::move(other.attributes_);
+ children_ = std::move(other.children_);
+ return *this;
+}
+
+inline DataSetElement::~DataSetElement(void) { }
+
+inline bool DataSetElement::operator==(const DataSetElement& other) const
+{
+ return xsd_ == other.xsd_ &&
+ label_ == other.label_ &&
+ text_ == other.text_ &&
+ attributes_ == other.attributes_ &&
+ children_ == other.children_;
+}
+
+inline bool DataSetElement::operator!=(const DataSetElement& other) const
+{ return !(*this == other); }
+
+template<typename T>
+const T& DataSetElement::operator[](size_t index) const
+{ return Child<T>(index); }
+
+template<typename T>
+T& DataSetElement::operator[](size_t index)
+{ return Child<T>(index); }
+
+template<typename T>
+const T& DataSetElement::operator[](const std::string& label) const
+{ return Child<T>(label); }
+
+template<typename T>
+T& DataSetElement::operator[](const std::string& label)
+{ return Child<T>(label); }
+
+inline void DataSetElement::AddChild(const DataSetElement& e)
+{ children_.push_back(e); }
+
+inline std::string& DataSetElement::Attribute(const std::string& name)
+{ return attributes_[name]; }
+
+inline const std::string& DataSetElement::Attribute(const std::string& name) const
+{
+ auto iter = attributes_.find(name);
+ if (iter == attributes_.cend())
+ return SharedNullString();
+ return iter->second;
+}
+
+inline void DataSetElement::Attribute(const std::string& name, const std::string& value)
+{ attributes_[name] = value; }
+
+inline const std::map<std::string, std::string>& DataSetElement::Attributes(void) const
+{ return attributes_; }
+
+inline std::map<std::string, std::string>& DataSetElement::Attributes(void)
+{ return attributes_; }
+
+template<typename T>
+inline const T& DataSetElement::Child(size_t index) const
+{ return static_cast<const T&>(children_.at(index)); }
+
+template<typename T>
+inline T& DataSetElement::Child(size_t index)
+{ return static_cast<T&>(children_.at(index)); }
+
+template<typename T>
+inline const T& DataSetElement::Child(const std::string& label) const
+{ return Child<T>(IndexOf(label)); }
+
+template<typename T>
+inline T& DataSetElement::Child(const std::string& label)
+{
+ const int i = IndexOf(label);
+ if (i >= 0) {
+ assert(static_cast<size_t>(i) < NumChildren());
+ return Child<T>(i);
+ } else {
+ AddChild(DataSetElement(label));
+ return Child<T>(NumChildren()-1);
+ }
+}
+
+inline const std::vector<DataSetElement>& DataSetElement::Children(void) const
+{ return children_; }
+
+inline std::vector<DataSetElement>& DataSetElement::Children(void)
+{ return children_; }
+
+inline const std::string& DataSetElement::ChildText(const std::string& label) const
+{
+ if (!HasChild(label))
+ return SharedNullString();
+ return Child<DataSetElement>(label).Text();
+}
+
+inline std::string& DataSetElement::ChildText(const std::string& label)
+{
+ if (!HasChild(label))
+ AddChild(DataSetElement(label));
+ return Child<DataSetElement>(label).Text();
+}
+
+inline bool DataSetElement::HasAttribute(const std::string& name) const
+{ return attributes_.find(name) != attributes_.cend(); }
+
+inline bool DataSetElement::HasChild(const std::string& label) const
+{ return IndexOf(label) != -1; }
+
+inline int DataSetElement::IndexOf(const std::string& label) const
+{
+ const size_t count = NumChildren();
+ for (size_t i = 0; i < count; ++i) {
+ const DataSetElement& child = children_.at(i);
+ if (child.LocalNameLabel() == label || child.label_ == label)
+ return i;
+ }
+ return -1;
+}
+
+inline const boost::string_ref DataSetElement::LocalNameLabel(void) const
+{ return label_.LocalName(); }
+
+inline const boost::string_ref DataSetElement::PrefixLabel(void) const
+{ return label_.Prefix(); }
+
+inline const std::string& DataSetElement::QualifiedNameLabel(void) const
+{ return label_.QualifiedName(); }
+
+//inline std::string& DataSetElement::Label(void)
+//{ return label_.QualifiedName(); }
+
+inline void DataSetElement::Label(const std::string& label)
+{ label_ = XmlName(label, true); }
+
+inline size_t DataSetElement::NumAttributes(void) const
+{ return attributes_.size(); }
+
+inline size_t DataSetElement::NumChildren(void) const
+{ return children_.size(); }
+
+inline void DataSetElement::RemoveChild(const DataSetElement& e)
+{
+ children_.erase(
+ std::remove(children_.begin(),
+ children_.end(),
+ e),
+ children_.end()
+ );
+}
+
+inline void DataSetElement::ChildText(const std::string& label,
+ const std::string& text)
+{
+ if (!HasChild(label)) {
+ DataSetElement e(label);
+ e.Text(text);
+ AddChild(e);
+ } else {
+ Child<DataSetElement>(label).Text(text);
+ }
+}
+
+inline bool DataSetElement::IsVerbatimLabel(void) const
+{ return label_.Verbatim(); }
+
+inline const std::string& DataSetElement::Text(void) const
+{ return text_; }
+
+inline std::string& DataSetElement::Text(void)
+{ return text_; }
+
+inline void DataSetElement::Text(const std::string& text)
+{ text_ = text; }
+
+inline const XsdType& DataSetElement::Xsd(void) const
+{ return xsd_; }
+
+// ----------------
+// XmlName
+// ----------------
+
+inline XmlName::XmlName(const std::string& fullName, bool verbatim)
+ : qualifiedName_(fullName)
+ , prefixSize_(0)
+ , localNameOffset_(0)
+ , localNameSize_(0)
+ , verbatim_(verbatim)
+{
+ const size_t colonFound = qualifiedName_.find(':');
+ if (colonFound == std::string::npos || colonFound == 0)
+ localNameSize_ = qualifiedName_.size();
+ else {
+ prefixSize_ = colonFound;
+ localNameSize_ = (qualifiedName_.size() - colonFound) - 1;
+ }
+
+ // adjust for colon if prefix present
+ localNameOffset_ = prefixSize_;
+ if (prefixSize_ != 0)
+ ++localNameOffset_;
+}
+
+inline XmlName::XmlName(const std::string& localName,
+ const std::string& prefix)
+ : prefixSize_(prefix.size())
+ , localNameOffset_(prefixSize_)
+ , localNameSize_(localName.size())
+ , verbatim_(true)
+{
+ qualifiedName_.clear();
+ qualifiedName_.reserve(localNameSize_+ prefixSize_ + 1);
+ qualifiedName_.append(prefix);
+ if (!qualifiedName_.empty())
+ qualifiedName_.append(1, ':');
+ qualifiedName_.append(localName);
+
+ // adjust for colon if prefix present
+ if (prefixSize_ != 0)
+ ++localNameOffset_;
+}
+
+inline XmlName::XmlName(const XmlName& other)
+ : qualifiedName_(other.qualifiedName_)
+ , prefixSize_(other.prefixSize_)
+ , localNameOffset_(other.localNameOffset_)
+ , localNameSize_(other.localNameSize_)
+ , verbatim_(other.verbatim_)
+{ }
+
+inline XmlName::XmlName(XmlName&& other)
+ : qualifiedName_(std::move(other.qualifiedName_))
+ , prefixSize_(std::move(other.prefixSize_))
+ , localNameOffset_(std::move(other.localNameOffset_))
+ , localNameSize_(std::move(other.localNameSize_))
+ , verbatim_(std::move(other.verbatim_))
+{ }
+
+inline XmlName& XmlName::operator=(const XmlName& other)
+{
+ qualifiedName_ = other.qualifiedName_;
+ prefixSize_ = other.prefixSize_;
+ localNameOffset_ = other.localNameOffset_;
+ localNameSize_ = other.localNameSize_;
+ verbatim_ = other.verbatim_;
+ return *this;
+}
+
+inline XmlName& XmlName::operator=(XmlName&& other)
+{
+ qualifiedName_ = std::move(other.qualifiedName_);
+ prefixSize_ = std::move(other.prefixSize_);
+ localNameOffset_ = std::move(other.localNameOffset_);
+ localNameSize_ = std::move(other.localNameSize_);
+ verbatim_ = std::move(other.verbatim_);
+ return *this;
+}
+
+inline XmlName::~XmlName(void) { }
+
+inline bool XmlName::operator==(const XmlName& other) const
+{ return qualifiedName_ == other.qualifiedName_; }
+
+inline bool XmlName::operator!=(const XmlName& other) const
+{ return !(*this == other); }
+
+inline const boost::string_ref XmlName::LocalName(void) const
+{ return boost::string_ref(qualifiedName_.data() + localNameOffset_, localNameSize_); }
+
+inline const boost::string_ref XmlName::Prefix(void) const
+{ return boost::string_ref(qualifiedName_.data(), prefixSize_); }
+
+inline const std::string& XmlName::QualifiedName(void) const
+{ return qualifiedName_; }
+
+inline bool XmlName::Verbatim(void) const
+{ return verbatim_; }
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef DATASETLISTELEMENT_H
+#define DATASETLISTELEMENT_H
+
+#include "pbbam/internal/DataSetElement.h"
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+//
+// adds iterators for convenience
+//
+template<class T> class DataSetListElement;
+
+template<class T>
+class DataSetListIteratorBase
+{
+public:
+ bool operator==(const DataSetListIteratorBase<T>& other) const;
+ bool operator!=(const DataSetListIteratorBase<T>& other) const;
+
+protected:
+ DataSetListIteratorBase(const DataSetListElement<T>* parent, size_t i);
+ void ReadNext(void);
+
+protected:
+ const DataSetListElement<T>* parent_;
+ size_t index_;
+};
+
+template<class T>
+class DataSetListIterator : public DataSetListIteratorBase<T>
+{
+public:
+ DataSetListIterator(const DataSetListElement<T>* parent, size_t i);
+ T& operator*(void);
+ T* operator->(void);
+ DataSetListIterator<T>& operator++(void);
+ DataSetListIterator<T> operator++(int);
+};
+
+template<class T>
+class DataSetListConstIterator : public DataSetListIteratorBase<T>
+{
+public:
+ DataSetListConstIterator(const DataSetListElement<T>* parent, size_t i);
+ const T& operator*(void) const;
+ const T* operator->(void) const;
+ DataSetListConstIterator<T>& operator++(void);
+ DataSetListConstIterator<T> operator++(int);
+};
+
+template<class T>
+class DataSetListElement : public DataSetElement
+{
+public:
+ DataSetListElement(const std::string& label, const XsdType& xsd = XsdType::NONE);
+
+// child access through index
+public:
+ const T& operator[](size_t index) const;
+ T& operator[](size_t index);
+ size_t Size(void) const;
+
+// child access through iterators
+public:
+ DataSetListIterator<T> begin(void);
+ DataSetListConstIterator<T> begin(void) const;
+ DataSetListConstIterator<T> cbegin(void) const;
+ DataSetListIterator<T> end(void);
+ DataSetListConstIterator<T> end(void) const;
+ DataSetListConstIterator<T> cend(void) const;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/DataSetListElement.inl"
+
+#endif // DATASETLISTELEMENT_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "pbbam/internal/DataSetListElement.h"
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// --------------------
+// DataSetListElement
+// --------------------
+
+template<class T>
+inline DataSetListElement<T>::DataSetListElement(const std::string& label,
+ const XsdType& xsd)
+ : DataSetElement(label, xsd)
+{ }
+
+template<class T>
+inline const T& DataSetListElement<T>::operator[](size_t index) const
+{ return static_cast<const T&>(children_.at(index)); }
+
+template<class T>
+inline T& DataSetListElement<T>::operator[](size_t index)
+{ return static_cast<T&>(children_.at(index)); }
+
+template<class T>
+inline size_t DataSetListElement<T>::Size(void) const
+{ return NumChildren(); }
+
+template<class T>
+inline DataSetListIterator<T> DataSetListElement<T>::begin(void)
+{ return DataSetListIterator<T>(this, 0); }
+
+template<class T>
+inline DataSetListConstIterator<T> DataSetListElement<T>::begin(void) const
+{ return DataSetListConstIterator<T>(this, 0); }
+
+template<class T>
+inline DataSetListConstIterator<T> DataSetListElement<T>::cbegin(void) const
+{ return DataSetListConstIterator<T>(this, 0); }
+
+template<class T>
+inline DataSetListIterator<T> DataSetListElement<T>::end(void)
+{ return DataSetListIterator<T>(this, NumChildren()); }
+
+template<class T>
+inline DataSetListConstIterator<T> DataSetListElement<T>::end(void) const
+{ return DataSetListConstIterator<T>(this, NumChildren()); }
+
+template<class T>
+inline DataSetListConstIterator<T>DataSetListElement<T>::cend(void) const
+{ return DataSetListConstIterator<T>(this, NumChildren()); }
+
+// -------------------------
+// DataSetListIteratorBase
+// -------------------------
+
+template<class T>
+inline bool DataSetListIteratorBase<T>::operator==(const DataSetListIteratorBase<T>& other) const
+{ return parent_ == other.parent_ &&
+ index_ == other.index_;
+}
+
+template<class T>
+inline bool DataSetListIteratorBase<T>::operator!=(const DataSetListIteratorBase<T>& other) const
+{ return !(*this == other); }
+
+template<class T>
+inline DataSetListIteratorBase<T>::DataSetListIteratorBase(const DataSetListElement<T>* parent, size_t i)
+ : parent_(parent)
+ , index_(i)
+{ }
+
+template<class T>
+inline void DataSetListIteratorBase<T>::ReadNext(void)
+{
+ if (index_ >= parent_->NumChildren()) {
+ parent_ = nullptr;
+ return;
+ }
+ ++index_;
+}
+
+// ---------------------
+// DataSetListIterator
+// ---------------------
+
+template<class T>
+inline DataSetListIterator<T>::DataSetListIterator(const DataSetListElement<T>* parent, size_t i)
+ : DataSetListIteratorBase<T>(parent, i)
+{ }
+
+template<class T>
+inline T& DataSetListIterator<T>::operator*(void)
+{ return DataSetListIteratorBase<T>::parent_->template Child<T>(DataSetListIteratorBase<T>::index_); }
+
+template<class T>
+inline T* DataSetListIterator<T>::operator->(void)
+{ return &(operator*()); }
+
+template<class T>
+inline DataSetListIterator<T>& DataSetListIterator<T>::operator++(void)
+{ DataSetListIteratorBase<T>::ReadNext(); return *this; }
+
+template<class T>
+inline DataSetListIterator<T> DataSetListIterator<T>::operator++(int)
+{
+ DataSetListIterator<T> result(*this);
+ ++(*this);
+ return result;
+}
+
+// --------------------------
+// DataSetListConstIterator
+// --------------------------
+
+template<class T>
+inline DataSetListConstIterator<T>::DataSetListConstIterator(const DataSetListElement<T>* parent, size_t i)
+ : DataSetListIteratorBase<T>(parent, i)
+{ }
+
+template<class T>
+inline const T& DataSetListConstIterator<T>::operator*(void) const
+{ return DataSetListIteratorBase<T>::parent_->template Child<T>(DataSetListIteratorBase<T>::index_); }
+
+template<class T>
+inline const T* DataSetListConstIterator<T>::operator->(void) const
+{ return &(operator*()); }
+
+template<class T>
+inline DataSetListConstIterator<T>& DataSetListConstIterator<T>::operator++(void)
+{ DataSetListIteratorBase<T>::ReadNext(); return *this; }
+
+template<class T>
+inline DataSetListConstIterator<T> DataSetListConstIterator<T>::operator++(int)
+{
+ DataSetListConstIterator<T> result(*this);
+ ++(*this);
+ return result;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file DataSetTypes.inl
+/// \brief Inline implementations for the public DataSet component classes.
+//
+// Author: Derek Barnett
+
+#include "pbbam/DataSetTypes.h"
+
+namespace PacBio {
+namespace BAM {
+
+// -------------
+// DataSetBase
+// --------------
+
+inline const NamespaceRegistry& DataSetBase::Namespaces(void) const
+{ return registry_; }
+
+inline NamespaceRegistry& DataSetBase::Namespaces(void)
+{ return registry_; }
+
+// ---------------------
+// DataSetMetadata
+// ---------------------
+
+inline const std::string& DataSetMetadata::NumRecords(void) const
+{ return ChildText("NumRecords"); }
+
+inline std::string& DataSetMetadata::NumRecords(void)
+{ return ChildText("NumRecords"); }
+
+inline DataSetMetadata& DataSetMetadata::NumRecords(const std::string& numRecords)
+{ ChildText("NumRecords", numRecords); return *this; }
+
+inline const std::string& DataSetMetadata::TotalLength(void) const
+{ return ChildText("TotalLength"); }
+
+inline std::string& DataSetMetadata::TotalLength(void)
+{ return ChildText("TotalLength"); }
+
+inline DataSetMetadata& DataSetMetadata::TotalLength(const std::string& totalLength)
+{ ChildText("TotalLength", totalLength); return *this; }
+
+// ----------
+// Property
+// ----------
+
+inline const std::string& Property::Name(void) const
+{ return Attribute("Name"); }
+
+inline std::string& Property::Name(void)
+{ return Attribute("Name"); }
+
+inline Property& Property::Name(const std::string& name)
+{ Attribute("Name", name); return *this; }
+
+inline const std::string& Property::Operator(void) const
+{ return Attribute("Operator"); }
+
+inline std::string& Property::Operator(void)
+{ return Attribute("Operator"); }
+
+inline Property& Property::Operator(const std::string& op)
+{ Attribute("Operator", op); return *this; }
+
+inline const std::string& Property::Value(void) const
+{ return Attribute("Value"); }
+
+inline std::string& Property::Value(void)
+{ return Attribute("Value"); }
+
+inline Property& Property::Value(const std::string& value)
+{ Attribute("Value", value); return *this; }
+
+// ------------
+// Provenance
+// ------------
+
+inline const std::string& Provenance::CreatedBy(void) const
+{ return Attribute("CreatedBy"); }
+
+inline std::string& Provenance::CreatedBy(void)
+{ return Attribute("CreatedBy"); }
+
+inline Provenance& Provenance::CreatedBy(const std::string& createdBy)
+{ Attribute("CreatedBy", createdBy); return *this; }
+
+inline const std::string& Provenance::CommonServicesInstanceId(void) const
+{ return ChildText("CommonServicesInstanceId"); }
+
+inline std::string& Provenance::CommonServicesInstanceId(void)
+{ return ChildText("CommonServicesInstanceId"); }
+
+inline Provenance& Provenance::CommonServicesInstanceId(const std::string& id)
+{ ChildText("CommonServicesInstanceId", id); return *this; }
+
+inline const std::string& Provenance::CreatorUserId(void) const
+{ return ChildText("CreatorUserId"); }
+
+inline std::string& Provenance::CreatorUserId(void)
+{ return ChildText("CreatorUserId"); }
+
+inline Provenance& Provenance::CreatorUserId(const std::string& id)
+{ ChildText("CreatorUserId", id); return *this; }
+
+inline const std::string& Provenance::ParentJobId(void) const
+{ return ChildText("ParentJobId"); }
+
+inline std::string& Provenance::ParentJobId(void)
+{ return ChildText("ParentJobId"); }
+
+inline Provenance& Provenance::ParentJobId(const std::string& id)
+{ ChildText("ParentJobId", id); return *this; }
+
+inline Provenance& Provenance::ParentTool(const PacBio::BAM::ParentTool& tool)
+{ ParentTool() = tool; return *this; }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file FastaSequence.inl
+/// \brief Inline implementations for the FastaSequence class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/FastaSequence.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline FastaSequence::FastaSequence(std::string name,
+ std::string bases)
+ : name_{std::move(name)}
+ , bases_{std::move(bases)}
+{ }
+
+inline std::string FastaSequence::Bases(void) const
+{ return bases_; }
+
+inline std::string FastaSequence::Name(void) const
+{ return name_; }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Frames.inl
+/// \brief Inline implementations for the Frames class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Frames.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline const std::vector<uint16_t>& Frames::Data(void) const
+{ return data_; }
+
+inline std::vector<uint16_t>& Frames::DataRaw(void)
+{ return data_; }
+
+inline std::vector<uint8_t> Frames::Encode(void) const
+{ return Frames::Encode(data_); }
+
+inline Frames& Frames::Data(const std::vector<uint16_t>& frames)
+{ data_ = frames; return *this; }
+
+inline Frames& Frames::Data(std::vector<uint16_t>&& frames)
+{ data_ = std::move(frames); return *this; }
+
+inline std::vector<uint16_t>::const_iterator Frames::begin(void) const
+{ return data_.begin(); }
+
+inline std::vector<uint16_t>::iterator Frames::begin(void)
+{ return data_.begin(); }
+
+inline std::vector<uint16_t>::const_iterator Frames::cbegin(void) const
+{ return data_.cbegin(); }
+
+inline std::vector<uint16_t>::const_iterator Frames::cend(void) const
+{ return data_.cend(); }
+
+inline std::vector<uint16_t>::const_iterator Frames::end(void) const
+{ return data_.end(); }
+
+inline std::vector<uint16_t>::iterator Frames::end(void)
+{ return data_.end(); }
+
+inline size_t Frames::size(void) const
+{ return data_.size(); }
+
+inline bool Frames::empty(void) const
+{ return data_.empty(); }
+
+inline bool Frames::operator==(const Frames& other) const
+{ return data_ == other.data_; }
+
+inline bool Frames::operator!=(const Frames& other) const
+{ return !(*this == other); }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file GenomicInterval.inl
+/// \brief Inline implementations for the GenomicInterval class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/GenomicInterval.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline GenomicInterval::~GenomicInterval(void) { }
+
+inline std::string GenomicInterval::Name(void) const
+{ return name_; }
+
+inline GenomicInterval& GenomicInterval::Name(const std::string& name)
+{ name_ = name; return *this; }
+
+inline PacBio::BAM::Interval<Position> GenomicInterval::Interval(void) const
+{ return interval_; }
+
+inline GenomicInterval& GenomicInterval::Interval(const PacBio::BAM::Interval<Position>& interval)
+{ interval_ = interval; return *this; }
+
+inline bool GenomicInterval::IsValid(void) const
+{
+ return !name_.empty() &&
+ interval_.Start() >= 0 &&
+ interval_.Stop() >= 0 &&
+ interval_.IsValid();
+}
+
+inline size_t GenomicInterval::Length(void) const
+{ return interval_.Length(); }
+
+inline Position GenomicInterval::Start(void) const
+{ return interval_.Start(); }
+
+inline GenomicInterval& GenomicInterval::Start(const Position start)
+{ interval_.Start(start); return *this; }
+
+inline Position GenomicInterval::Stop(void) const
+{ return interval_.Stop(); }
+
+inline GenomicInterval& GenomicInterval::Stop(const Position stop)
+{ interval_.Stop(stop); return *this; }
+
+inline bool GenomicInterval::operator==(const GenomicInterval& other) const
+{ return name_ == other.name_ && interval_ == other.interval_; }
+
+inline bool GenomicInterval::operator!=(const GenomicInterval& other) const
+{ return !(*this == other); }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Interval.inl
+/// \brief Inline implementations for the Interval class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Interval.h"
+
+namespace PacBio {
+namespace BAM {
+
+template<typename T>
+inline Interval<T>::Interval(void)
+ : data_(boost::icl::discrete_interval<T>::right_open(0,0))
+{ }
+
+template<typename T>
+inline Interval<T>::Interval(const T val)
+ : data_(boost::icl::discrete_interval<T>::right_open(val,val+1))
+{ }
+
+template<typename T>
+inline Interval<T>::Interval(const T start, const T stop)
+ : data_(boost::icl::discrete_interval<T>::right_open(start,stop))
+{ }
+
+template<typename T>
+inline Interval<T>::Interval(const Interval<T>& other)
+ : data_(boost::icl::discrete_interval<T>::right_open(other.Start(), other.Stop()))
+{ }
+
+template<typename T>
+inline bool Interval<T>::operator==(const Interval<T>& other) const
+{ return data_ == other.data_; }
+
+template<typename T>
+inline bool Interval<T>::operator!=(const Interval<T>& other) const
+{ return !(data_ == other.data_); }
+
+template<typename T>
+inline bool Interval<T>::CoveredBy(const Interval<T>& other) const
+{ return boost::icl::within(data_, other.data_); }
+
+template<typename T>
+inline bool Interval<T>::Covers(const Interval<T>& other) const
+{ return boost::icl::contains(data_, other.data_); }
+
+template<typename T>
+inline bool Interval<T>::Intersects(const Interval<T>& other) const
+{ return boost::icl::intersects(data_, other.data_); }
+
+template<typename T>
+inline bool Interval<T>::IsValid(void) const
+{ return !boost::icl::is_empty(data_); }
+
+template<typename T>
+inline size_t Interval<T>::Length(void) const
+{ return boost::icl::length(data_); }
+
+template<typename T>
+inline T Interval<T>::Start(void) const
+{ return data_.lower(); }
+
+template<typename T>
+inline Interval<T>& Interval<T>::Start(const T& start)
+{
+ data_ = boost::icl::discrete_interval<T>::right_open(start, data_.upper());
+ return *this;
+}
+
+template<typename T>
+inline T Interval<T>::Stop(void) const
+{ return data_.upper(); }
+
+template<typename T>
+inline Interval<T>& Interval<T>::Stop(const T& stop)
+{
+ data_ = boost::icl::discrete_interval<T>::right_open(data_.lower(), stop);
+ return *this;
+}
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiBasicTypes.inl
+/// \brief Inline implementations for the basic data structures used in PBI lookups.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiBasicTypes.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline IndexResultBlock::IndexResultBlock(void)
+ : firstIndex_(0)
+ , numReads_(0)
+ , virtualOffset_(-1)
+{ }
+
+inline IndexResultBlock::IndexResultBlock(size_t idx, size_t numReads)
+ : firstIndex_(idx)
+ , numReads_(numReads)
+ , virtualOffset_(-1)
+{ }
+
+inline bool IndexResultBlock::operator==(const IndexResultBlock& other) const
+{
+ return firstIndex_ == other.firstIndex_ &&
+ numReads_ == other.numReads_ &&
+ virtualOffset_ == other.virtualOffset_;
+}
+
+inline bool IndexResultBlock::operator!=(const IndexResultBlock& other) const
+{ return !(*this == other); }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilter.inl
+/// \brief Inline implementations for the PbiFilter class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiFilter.h"
+#include <algorithm>
+#include <iostream>
+#include <map>
+#include <set>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+/// \internal
+///
+/// This class wraps a the basic PBI filter (whether property filter or some operator
+/// e.g. union, intersect, etc.). The wrapper allows PbiFilters to hold heterogeneous,
+/// recursive filter types - without exposing pointers & worrying about memory ownership
+/// issues between client & library.
+///
+/// Filters can be given by value from client code and we will wrap them for composition.
+///
+/// \code{.cpp}
+/// PbiFilter f1(PbiZmwFilter(42));
+/// PbiFilter f2;
+/// f2.Add(PbiQueryLengthFilter(3000, GREATER_THAN_EQUAL));
+/// f2.Add(MyApplicationCustomFilter("foo"));
+/// PbiFilter intersect = PbiFilter::Intersect(f1, f2);
+/// ...
+/// \endcode
+///
+struct FilterWrapper
+{
+public:
+ template<typename T> FilterWrapper(T x);
+
+ FilterWrapper(const FilterWrapper& other);
+ FilterWrapper(FilterWrapper&&) noexcept = default;
+ FilterWrapper& operator=(const FilterWrapper& other);
+ FilterWrapper& operator=(FilterWrapper&&) noexcept = default;
+ ~FilterWrapper(void);
+
+public:
+ bool Accepts(const PacBio::BAM::PbiRawData& idx, const size_t row) const;
+
+private:
+ struct WrapperInterface
+ {
+ virtual ~WrapperInterface(void) = default;
+ virtual WrapperInterface* Clone(void) const =0;
+ virtual bool Accepts(const PacBio::BAM::PbiRawData& idx,
+ const size_t row) const =0;
+ };
+
+ template<typename T>
+ struct WrapperImpl : public WrapperInterface
+ {
+ WrapperImpl(T x);
+ WrapperImpl(const WrapperImpl& other);
+ WrapperInterface* Clone(void) const;
+ bool Accepts(const PacBio::BAM::PbiRawData& idx, const size_t row) const;
+ T data_;
+ };
+
+private:
+ std::unique_ptr<WrapperInterface> self_;
+};
+
+// ---------------
+// FilterWrapper
+// ---------------
+
+template<typename T>
+inline FilterWrapper::FilterWrapper(T x)
+ : self_(new WrapperImpl<T>(std::move(x)))
+{ }
+
+inline FilterWrapper::FilterWrapper(const FilterWrapper& other)
+ : self_(other.self_->Clone())
+{ }
+
+inline FilterWrapper& FilterWrapper::operator=(const FilterWrapper& other)
+{
+ self_.reset(other.self_->Clone());
+ return *this;
+}
+
+inline FilterWrapper::~FilterWrapper(void) { }
+
+inline bool FilterWrapper::Accepts(const PbiRawData& idx, const size_t row) const
+{ return self_->Accepts(idx, row); }
+
+// ----------------
+// WrapperImpl<T>
+// ----------------
+
+template<typename T>
+inline FilterWrapper::WrapperImpl<T>::WrapperImpl(T x)
+ : FilterWrapper::WrapperInterface()
+ , data_(std::move(x))
+{
+ BOOST_CONCEPT_ASSERT((PbiFilterConcept<T>));
+}
+
+template<typename T>
+inline FilterWrapper::WrapperImpl<T>::WrapperImpl(const WrapperImpl& other)
+ : FilterWrapper::WrapperInterface()
+ , data_(other.data_)
+{ }
+
+template<typename T>
+inline FilterWrapper::WrapperInterface* FilterWrapper::WrapperImpl<T>::Clone(void) const
+{ return new WrapperImpl(*this); }
+
+template<typename T>
+inline bool FilterWrapper::WrapperImpl<T>::Accepts(const PbiRawData& idx,
+ const size_t row) const
+{ return data_.Accepts(idx, row); }
+
+struct PbiFilterPrivate
+{
+ PbiFilterPrivate(PbiFilter::CompositionType type)
+ : type_(type)
+ { }
+
+ template<typename T>
+ void Add(T&& filter)
+ {
+ filters_.emplace_back(std::move(filter));
+ }
+
+ std::unique_ptr<internal::PbiFilterPrivate> DeepCopy(void)
+ {
+ auto copy = std::unique_ptr<PbiFilterPrivate>{ new PbiFilterPrivate{type_} };
+ copy->filters_ = this->filters_;
+ return copy;
+ }
+
+ bool Accepts(const PbiRawData& idx, const size_t row) const
+ {
+ // no filter -> accepts every record
+ if (filters_.empty())
+ return true;
+
+ // intersection of child filters
+ if (type_ == PbiFilter::INTERSECT) {
+ for (const auto& filter : filters_) {
+ if (!filter.Accepts(idx, row))
+ return false; // break early on failure
+ }
+ return true; // all passed
+ }
+
+ // union of child filters
+ else if (type_ == PbiFilter::UNION) {
+ for (const auto& filter : filters_) {
+ if (filter.Accepts(idx, row))
+ return true; // break early on pass
+ }
+ return false; // none passed
+ }
+
+ else
+ //assert(false); // invalid composite filter type
+ throw std::runtime_error("invalid composite filter type in PbiFilterPrivate::Accepts");
+ }
+
+ PbiFilter::CompositionType type_;
+ std::vector<FilterWrapper> filters_;
+};
+
+} // namespace internal
+
+inline PbiFilter::PbiFilter(const CompositionType type)
+ : d_{ new internal::PbiFilterPrivate{ type } }
+{ }
+
+template<typename T> inline
+PbiFilter::PbiFilter(const T& filter)
+ : d_{ new internal::PbiFilterPrivate{ PbiFilter::INTERSECT } }
+{
+ Add(filter);
+}
+
+template<typename T> inline
+PbiFilter::PbiFilter(T&& filter)
+ : d_{ new internal::PbiFilterPrivate{ PbiFilter::INTERSECT } }
+{
+ Add(std::move(filter));
+}
+
+inline PbiFilter::PbiFilter(const std::vector<PbiFilter>& filters)
+ : d_{ new internal::PbiFilterPrivate{ PbiFilter::INTERSECT } }
+{
+ Add(filters);
+}
+
+inline PbiFilter::PbiFilter(std::vector<PbiFilter>&& filters)
+ : d_{ new internal::PbiFilterPrivate{ PbiFilter::INTERSECT} }
+{
+ Add(std::move(filters));
+}
+
+inline PbiFilter::PbiFilter(const PbiFilter& other)
+ : d_{ other.d_->DeepCopy() }
+{ }
+
+inline PbiFilter::PbiFilter(PbiFilter&& other) noexcept
+ : d_{ std::move(other.d_) }
+{ }
+
+inline PbiFilter& PbiFilter::operator=(const PbiFilter& other)
+{
+ d_ = other.d_->DeepCopy();
+ return *this;
+}
+
+inline PbiFilter& PbiFilter::operator=(PbiFilter&& other) noexcept
+{
+ d_ = std::move(other.d_);
+ return *this;
+}
+
+inline PbiFilter::~PbiFilter(void) { }
+
+inline bool PbiFilter::Accepts(const PacBio::BAM::PbiRawData& idx,
+ const size_t row) const
+{ return d_->Accepts(idx, row); }
+
+template<typename T>
+inline PbiFilter& PbiFilter::Add(const T& filter)
+{
+ T copy = filter;
+ return Add(std::move(copy));
+}
+
+template<typename T>
+inline PbiFilter& PbiFilter::Add(T&& filter)
+{
+ d_->Add(std::move(filter));
+ return *this;
+}
+
+inline PbiFilter& PbiFilter::Add(const PbiFilter& filter)
+{
+ PbiFilter copy = filter;
+ return Add(std::move(copy));
+}
+
+inline PbiFilter& PbiFilter::Add(PbiFilter&& filter)
+{
+ d_->Add(std::move(filter));
+ return *this;
+}
+
+inline PbiFilter& PbiFilter::Add(const std::vector<PbiFilter>& filters)
+{
+ std::vector<PbiFilter> copy = filters;
+ return Add(std::move(copy));
+}
+
+inline PbiFilter& PbiFilter::Add(std::vector<PbiFilter>&& filters)
+{
+ for (auto&& filter : filters)
+ d_->Add(std::move(filter));
+ return *this;
+}
+
+inline bool PbiFilter::IsEmpty(void) const
+{ return d_->filters_.empty(); }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilterTypes.inl
+/// \brief Inline implementations for the built-in PBI filters.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiFilterTypes.h"
+#include <cassert>
+#include <stdexcept>
+
+namespace PacBio {
+namespace BAM {
+
+namespace internal {
+
+template <typename T>
+inline FilterBase<T>::FilterBase(const T& value, const Compare::Type cmp)
+ : value_(value)
+ , cmp_(cmp)
+{ }
+
+template <typename T>
+inline FilterBase<T>::FilterBase(T&& value, const Compare::Type cmp)
+ : value_(std::move(value))
+ , cmp_(cmp)
+{ }
+
+template <typename T>
+inline FilterBase<T>::FilterBase(const std::vector<T>& values)
+ : multiValue_(values)
+{ }
+
+template <typename T>
+inline FilterBase<T>::FilterBase(std::vector<T>&& values)
+ : multiValue_(std::move(values))
+{ }
+
+template<typename T>
+inline bool FilterBase<T>::CompareHelper(const T& lhs) const
+{
+ if (multiValue_ == boost::none)
+ return CompareSingleHelper(lhs);
+ else
+ return CompareMultiHelper(lhs);
+}
+
+template<typename T>
+inline bool FilterBase<T>::CompareMultiHelper(const T& lhs) const
+{
+ // check provided value against all filter criteria,
+ // return true on any exact match
+ auto iter = multiValue_.get().cbegin();
+ const auto end = multiValue_.get().cend();
+ for (; iter != end; ++iter) {
+ if (*iter == lhs)
+ return true;
+ }
+ return false; // no matches
+}
+
+template<typename T>
+inline bool FilterBase<T>::CompareSingleHelper(const T& lhs) const
+{
+ switch(cmp_) {
+ case Compare::EQUAL: return lhs == value_;
+ case Compare::LESS_THAN: return lhs < value_;
+ case Compare::LESS_THAN_EQUAL: return lhs <= value_;
+ case Compare::GREATER_THAN: return lhs > value_;
+ case Compare::GREATER_THAN_EQUAL: return lhs >= value_;
+ case Compare::NOT_EQUAL: return lhs != value_;
+ default:
+ assert(false);
+ throw std::runtime_error("unsupported compare type requested");
+ }
+}
+
+template<>
+inline bool FilterBase<LocalContextFlags>::CompareSingleHelper(const LocalContextFlags& lhs) const
+{
+ switch(cmp_) {
+ case Compare::EQUAL: return lhs == value_;
+ case Compare::LESS_THAN: return lhs < value_;
+ case Compare::LESS_THAN_EQUAL: return lhs <= value_;
+ case Compare::GREATER_THAN: return lhs > value_;
+ case Compare::GREATER_THAN_EQUAL: return lhs >= value_;
+ case Compare::NOT_EQUAL: return lhs != value_;
+ case Compare::CONTAINS: return ((lhs & value_) != 0);
+ case Compare::NOT_CONTAINS: return ((lhs & value_) == 0);
+
+ default:
+ assert(false);
+ throw std::runtime_error("unsupported compare type requested");
+ }
+}
+
+// BarcodeDataFilterBase
+
+template<typename T, BarcodeLookupData::Field field>
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(const T& value, const Compare::Type cmp)
+ : FilterBase<T>(value, cmp)
+{ }
+
+template<typename T, BarcodeLookupData::Field field>
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(T&& value, const Compare::Type cmp)
+ : FilterBase<T>(std::move(value), cmp)
+{ }
+
+template<typename T, BarcodeLookupData::Field field>
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(const std::vector<T>& values)
+ : FilterBase<T>(values)
+{ }
+
+template<typename T, BarcodeLookupData::Field field>
+inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(std::vector<T>&& values)
+ : FilterBase<T>(std::move(values))
+{ }
+
+template<typename T, BarcodeLookupData::Field field>
+inline bool BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase::Accepts(const PbiRawData& idx,
+ const size_t row) const
+{
+ const PbiRawBarcodeData& barcodeData = idx.BarcodeData();
+ switch (field) {
+ case BarcodeLookupData::BC_FORWARD: return FilterBase<T>::CompareHelper(barcodeData.bcForward_.at(row));
+ case BarcodeLookupData::BC_REVERSE: return FilterBase<T>::CompareHelper(barcodeData.bcReverse_.at(row));
+ case BarcodeLookupData::BC_QUALITY: return FilterBase<T>::CompareHelper(barcodeData.bcQual_.at(row));
+ default:
+ assert(false);
+ throw std::runtime_error("unsupported BarcodeData field requested");
+ }
+}
+
+// BasicDataFilterBase
+
+template<typename T, BasicLookupData::Field field>
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(const T& value, const Compare::Type cmp)
+ : FilterBase<T>(value, cmp)
+{ }
+
+template<typename T, BasicLookupData::Field field>
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(T&& value, const Compare::Type cmp)
+ : FilterBase<T>(std::move(value), cmp)
+{ }
+
+template<typename T, BasicLookupData::Field field>
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(const std::vector<T>& values)
+ : FilterBase<T>(values)
+{ }
+
+template<typename T, BasicLookupData::Field field>
+inline BasicDataFilterBase<T, field>::BasicDataFilterBase(std::vector<T>&& values)
+ : FilterBase<T>(std::move(values))
+{ }
+
+template<typename T, BasicLookupData::Field field>
+inline bool BasicDataFilterBase<T, field>::BasicDataFilterBase::Accepts(const PbiRawData& idx,
+ const size_t row) const
+{
+ const PbiRawBasicData& basicData = idx.BasicData();
+ switch (field) {
+ case BasicLookupData::RG_ID: return FilterBase<T>::CompareHelper(basicData.rgId_.at(row));
+ case BasicLookupData::Q_START: return FilterBase<T>::CompareHelper(basicData.qStart_.at(row));
+ case BasicLookupData::Q_END: return FilterBase<T>::CompareHelper(basicData.qEnd_.at(row));
+ case BasicLookupData::ZMW: return FilterBase<T>::CompareHelper(basicData.holeNumber_.at(row));
+ case BasicLookupData::READ_QUALITY: return FilterBase<T>::CompareHelper(basicData.readQual_.at(row));
+ // BasicLookupData::CONTEXT_FLAG has its own specialization
+ default:
+ assert(false);
+ throw std::runtime_error("unsupported BasicData field requested");
+ }
+}
+
+// this typedef exists purely so that the next method signature isn't 2 screen widths long
+typedef BasicDataFilterBase<LocalContextFlags, BasicLookupData::CONTEXT_FLAG> LocalContextFilter__;
+
+template<>
+inline bool LocalContextFilter__::BasicDataFilterBase::Accepts(const PbiRawData& idx,
+ const size_t row) const
+{
+ const PbiRawBasicData& basicData = idx.BasicData();
+ const LocalContextFlags rowFlags = static_cast<LocalContextFlags>(basicData.ctxtFlag_.at(row));
+ return FilterBase<LocalContextFlags>::CompareHelper(rowFlags);
+}
+
+template<typename T, MappedLookupData::Field field>
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(const T& value, const Compare::Type cmp)
+ : FilterBase<T>(value, cmp)
+{ }
+
+template<typename T, MappedLookupData::Field field>
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(T&& value, const Compare::Type cmp)
+ : FilterBase<T>(std::move(value), cmp)
+{ }
+
+template<typename T, MappedLookupData::Field field>
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(const std::vector<T>& values)
+ : FilterBase<T>(values)
+{ }
+
+template<typename T, MappedLookupData::Field field>
+inline MappedDataFilterBase<T, field>::MappedDataFilterBase(std::vector<T>&& values)
+ : FilterBase<T>(std::move(values))
+{ }
+
+template<>
+inline bool MappedDataFilterBase<Strand, MappedLookupData::STRAND>::MappedDataFilterBase::Accepts(const PbiRawData& idx,
+ const size_t row) const
+{
+ const PbiRawMappedData& mappedData = idx.MappedData();
+ const Strand strand = (mappedData.revStrand_.at(row) == 1 ? Strand::REVERSE : Strand::FORWARD);
+ return FilterBase<Strand>::CompareHelper(strand);
+}
+
+template<typename T, MappedLookupData::Field field>
+inline bool MappedDataFilterBase<T, field>::MappedDataFilterBase::Accepts(const PbiRawData& idx,
+ const size_t row) const
+{
+ const PbiRawMappedData& mappedData = idx.MappedData();
+ switch (field) {
+ case MappedLookupData::T_ID: return FilterBase<T>::CompareHelper(mappedData.tId_.at(row));
+ case MappedLookupData::T_START: return FilterBase<T>::CompareHelper(mappedData.tStart_.at(row));
+ case MappedLookupData::T_END: return FilterBase<T>::CompareHelper(mappedData.tEnd_.at(row));
+ case MappedLookupData::A_START: return FilterBase<T>::CompareHelper(mappedData.aStart_.at(row));
+ case MappedLookupData::A_END: return FilterBase<T>::CompareHelper(mappedData.aEnd_.at(row));
+ case MappedLookupData::N_M: return FilterBase<T>::CompareHelper(mappedData.nM_.at(row));
+ case MappedLookupData::N_MM: return FilterBase<T>::CompareHelper(mappedData.nMM_.at(row));
+ case MappedLookupData::N_DEL: return FilterBase<T>::CompareHelper(mappedData.NumDeletedBasesAt(row));
+ case MappedLookupData::N_INS: return FilterBase<T>::CompareHelper(mappedData.NumInsertedBasesAt(row));
+ case MappedLookupData::MAP_QUALITY: return FilterBase<T>::CompareHelper(mappedData.mapQV_.at(row));
+ default:
+ assert(false);
+ throw std::runtime_error("unsupported MappedData field requested");
+ }
+}
+
+} // namespace internal
+
+// PbiAlignedEndFilter
+
+inline PbiAlignedEndFilter::PbiAlignedEndFilter(const uint32_t position, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<uint32_t, MappedLookupData::A_END>(position, cmp)
+{ }
+
+// PbiAlignedLengthFilter
+
+inline PbiAlignedLengthFilter::PbiAlignedLengthFilter(const uint32_t length, const Compare::Type cmp)
+ : internal::FilterBase<uint32_t>(length, cmp)
+{ }
+
+// PbiAlignedStartFilter
+
+inline PbiAlignedStartFilter::PbiAlignedStartFilter(const uint32_t position, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<uint32_t, MappedLookupData::A_START>(position, cmp)
+{ }
+
+// PbiAlignedStrandFilter
+
+inline PbiAlignedStrandFilter::PbiAlignedStrandFilter(const Strand strand, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<Strand, MappedLookupData::STRAND>(strand, cmp)
+{
+ if (cmp != Compare::EQUAL && cmp != Compare::NOT_EQUAL) {
+ auto msg = std::string{ "Compare type: " };
+ msg += Compare::TypeToName(cmp);
+ msg += " not supported for PbiAlignedStrandFilter (use one of Compare::EQUAL or Compare::NOT_EQUAL).";
+ throw std::runtime_error(msg);
+ }
+}
+
+// PbiBarcodeFilter
+
+inline PbiBarcodeFilter::PbiBarcodeFilter(const int16_t barcode, const Compare::Type cmp)
+ : compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{barcode,cmp},
+ PbiBarcodeReverseFilter{barcode,cmp}
+ })
+ }
+{ }
+
+inline PbiBarcodeFilter::PbiBarcodeFilter(const std::vector<int16_t>& whitelist)
+ : compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{whitelist},
+ PbiBarcodeReverseFilter{whitelist}
+ })
+ }
+{ }
+
+inline PbiBarcodeFilter::PbiBarcodeFilter(std::vector<int16_t>&& whitelist)
+ : compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{std::move(whitelist)},
+ PbiBarcodeReverseFilter{std::move(whitelist)}
+ })
+ }
+{ }
+
+inline bool PbiBarcodeFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{ return compositeFilter_.Accepts(idx, row); }
+
+// PbiBarcodeForwardFilter
+
+inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(const int16_t bcFwdId, const Compare::Type cmp)
+ : internal::BarcodeDataFilterBase<int16_t, BarcodeLookupData::BC_FORWARD>(bcFwdId, cmp)
+{ }
+
+inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(const std::vector<int16_t>& whitelist)
+ : internal::BarcodeDataFilterBase<int16_t, BarcodeLookupData::BC_FORWARD>(whitelist)
+{ }
+
+inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(std::vector<int16_t>&& whitelist)
+ : internal::BarcodeDataFilterBase<int16_t, BarcodeLookupData::BC_FORWARD>(std::move(whitelist))
+{ }
+
+// PbiBarcodeQualityFilter
+
+inline PbiBarcodeQualityFilter::PbiBarcodeQualityFilter(const uint8_t bcQuality, const Compare::Type cmp)
+ : internal::BarcodeDataFilterBase<uint8_t, BarcodeLookupData::BC_QUALITY>(bcQuality, cmp)
+{ }
+
+// PbiBarcodeReverseFilter
+
+inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(const int16_t bcRevId, const Compare::Type cmp)
+ : internal::BarcodeDataFilterBase<int16_t, BarcodeLookupData::BC_REVERSE>(bcRevId, cmp)
+{ }
+
+inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(const std::vector<int16_t>& whitelist)
+ : internal::BarcodeDataFilterBase<int16_t, BarcodeLookupData::BC_REVERSE>(whitelist)
+{ }
+
+inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(std::vector<int16_t>&& whitelist)
+ : internal::BarcodeDataFilterBase<int16_t, BarcodeLookupData::BC_REVERSE>(std::move(whitelist))
+{ }
+
+// PbiBarcodesFilter
+
+inline PbiBarcodesFilter::PbiBarcodesFilter(const std::pair<int16_t, int16_t> barcodes, const Compare::Type cmp)
+ : PbiBarcodesFilter(barcodes.first, barcodes.second, cmp)
+{ }
+
+inline PbiBarcodesFilter::PbiBarcodesFilter(const int16_t bcForward, const int16_t bcReverse, const Compare::Type cmp)
+ : compositeFilter_{ PbiFilter::Intersection({ PbiBarcodeForwardFilter{bcForward,cmp},
+ PbiBarcodeReverseFilter{bcReverse,cmp}
+ })
+ }
+{ }
+
+inline bool PbiBarcodesFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{ return compositeFilter_.Accepts(idx, row); }
+
+// PbiIdentityFilter
+
+inline PbiIdentityFilter::PbiIdentityFilter(const float identity,
+ const Compare::Type cmp)
+ : internal::FilterBase<float>(identity, cmp)
+{ }
+
+// PbiLocalContextFilter
+
+inline PbiLocalContextFilter::PbiLocalContextFilter(const LocalContextFlags& flags,
+ const Compare::Type cmp)
+ : internal::BasicDataFilterBase<LocalContextFlags, BasicLookupData::CONTEXT_FLAG>(flags, cmp)
+{ }
+
+// PbiMapQualityFilter
+
+inline PbiMapQualityFilter::PbiMapQualityFilter(const uint8_t mapQual, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<uint8_t, MappedLookupData::MAP_QUALITY>(mapQual, cmp)
+{ }
+
+// PbiMovieNameFilter
+
+inline bool PbiMovieNameFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{ return compositeFilter_.Accepts(idx, row); }
+
+// PbiNumDeletedBasesFilter
+
+inline PbiNumDeletedBasesFilter::PbiNumDeletedBasesFilter(const size_t numDeletions, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<size_t, MappedLookupData::N_DEL>(numDeletions, cmp)
+{ }
+
+// PbiNumInsertedBasesFilter
+
+inline PbiNumInsertedBasesFilter::PbiNumInsertedBasesFilter(const size_t numInsertions, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<size_t, MappedLookupData::N_INS>(numInsertions, cmp)
+{ }
+
+// PbiNumMatchesFilter
+
+inline PbiNumMatchesFilter::PbiNumMatchesFilter(const size_t numMatchedBases, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<size_t, MappedLookupData::N_M>(numMatchedBases, cmp)
+{ }
+
+// PbiNumMismatchesFilter
+
+inline PbiNumMismatchesFilter::PbiNumMismatchesFilter(const size_t numMismatchedBases, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<size_t, MappedLookupData::N_MM>(numMismatchedBases, cmp)
+{ }
+
+// PbiQueryEndFilter
+
+inline PbiQueryEndFilter::PbiQueryEndFilter(const int32_t position, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::Q_END>(position, cmp)
+{ }
+
+// PbiQueryLengthFilter
+
+inline PbiQueryLengthFilter::PbiQueryLengthFilter(const int32_t length, const Compare::Type cmp)
+ : internal::FilterBase<int32_t>(length, cmp)
+{ }
+
+// PbiQueryStartFilter
+
+inline PbiQueryStartFilter::PbiQueryStartFilter(const int32_t position, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::Q_START>(position, cmp)
+{ }
+
+// PbiReadAccuracyFilter
+
+inline PbiReadAccuracyFilter::PbiReadAccuracyFilter(const Accuracy accuracy, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<Accuracy, BasicLookupData::READ_QUALITY>(accuracy, cmp)
+{ }
+
+// PbiReadGroupFilter
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const int32_t rgId, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(rgId, cmp)
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::string rgId, const Compare::Type cmp)
+ : PbiReadGroupFilter(ReadGroupInfo::IdToInt(rgId), cmp)
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const ReadGroupInfo& rg, const Compare::Type cmp)
+ : PbiReadGroupFilter(rg.Id(), cmp)
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<int32_t>& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(whitelist)
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<int32_t>&& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::move(whitelist))
+{ }
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<std::string>& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::vector<int32_t>())
+{
+ multiValue_->reserve(whitelist.size());
+ for (const auto& rg : whitelist)
+ multiValue_->push_back(ReadGroupInfo::IdToInt(rg));
+}
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<std::string>&& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::vector<int32_t>())
+{
+ multiValue_->reserve(whitelist.size());
+ for (auto&& rg : whitelist)
+ multiValue_->push_back(ReadGroupInfo::IdToInt(rg));
+}
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::vector<int32_t>())
+{
+ multiValue_->reserve(whitelist.size());
+ for (const auto& rg : whitelist)
+ multiValue_->push_back(ReadGroupInfo::IdToInt(rg.Id()));
+}
+
+inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<ReadGroupInfo>&& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::RG_ID>(std::vector<int32_t>())
+{
+ multiValue_->reserve(whitelist.size());
+ for (auto&& rg : whitelist)
+ multiValue_->push_back(ReadGroupInfo::IdToInt(rg.Id()));
+}
+
+// PbiReferenceEndFilter
+
+inline PbiReferenceEndFilter::PbiReferenceEndFilter(const uint32_t tEnd, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<uint32_t, MappedLookupData::T_END>(tEnd, cmp)
+{ }
+
+// PbiReferenceIdFilter
+
+inline PbiReferenceIdFilter::PbiReferenceIdFilter(const int32_t tId, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<int32_t, MappedLookupData::T_ID>(tId, cmp)
+{ }
+
+inline PbiReferenceIdFilter::PbiReferenceIdFilter(const std::vector<int32_t>& whitelist)
+ : internal::MappedDataFilterBase<int32_t, MappedLookupData::T_ID>(whitelist)
+{ }
+
+inline PbiReferenceIdFilter::PbiReferenceIdFilter(std::vector<int32_t>&& whitelist)
+ : internal::MappedDataFilterBase<int32_t, MappedLookupData::T_ID>(std::move(whitelist))
+{ }
+
+// PbiReferenceStartFilter
+
+inline PbiReferenceStartFilter::PbiReferenceStartFilter(const uint32_t tStart, const Compare::Type cmp)
+ : internal::MappedDataFilterBase<uint32_t, MappedLookupData::T_START>(tStart, cmp)
+{ }
+
+// PbiZmwFilter
+
+inline PbiZmwFilter::PbiZmwFilter(const int32_t zmw, const Compare::Type cmp)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::ZMW>(zmw, cmp)
+{ }
+
+inline PbiZmwFilter::PbiZmwFilter(const std::vector<int32_t>& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::ZMW>(whitelist)
+{ }
+
+inline PbiZmwFilter::PbiZmwFilter(std::vector<int32_t>&& whitelist)
+ : internal::BasicDataFilterBase<int32_t, BasicLookupData::ZMW>(std::move(whitelist))
+{ }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiIndex.inl
+/// \brief Inline implementations for the PbiIndex class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamRecord.h"
+#include "pbbam/PbiFile.h"
+#include "pbbam/PbiIndex.h"
+#include "pbbam/PbiRawData.h"
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// --------------------------
+// Pbi Lookup Aggregate
+// --------------------------
+
+class PbiIndexPrivate
+{
+public:
+ PbiIndexPrivate(void);
+ PbiIndexPrivate(const PbiRawData& rawIndex);
+ PbiIndexPrivate(PbiRawData&& rawIndex);
+
+ std::unique_ptr<PbiIndexPrivate> DeepCopy(void) const;
+
+public:
+ bool HasSection(const PbiFile::Section flag) const;
+ void SetSection(const PbiFile::Section flag, bool ok = true);
+
+public:
+ IndexResultBlocks LookupReference(const int32_t tId) const;
+
+private:
+ IndexResultBlocks MergeBlocksWithOffsets(const IndexList& indices) const;
+
+public:
+ std::string filename_;
+ PbiFile::VersionEnum version_;
+ PbiFile::Sections sections_;
+ uint32_t numReads_;
+
+ // lookup structures
+ BasicLookupData basicData_;
+ MappedLookupData mappedData_;
+ ReferenceLookupData referenceData_;
+ BarcodeLookupData barcodeData_;
+
+private:
+ // not-implemented - ensure no copy
+ PbiIndexPrivate(const PbiIndexPrivate& other);
+ PbiIndexPrivate& operator=(const PbiIndexPrivate& other);
+};
+
+inline bool PbiIndexPrivate::HasSection(const PbiFile::Section flag) const
+{ return (sections_ & flag) != 0; }
+
+inline void PbiIndexPrivate::SetSection(const PbiFile::Section flag, bool ok)
+{ if (ok) sections_ |= flag; else sections_ &= ~flag; }
+
+inline IndexResultBlocks
+PbiIndexPrivate::LookupReference(const int32_t tId) const
+{
+ if (!HasSection(PbiFile::REFERENCE))
+ return IndexResultBlocks{ };
+
+ const auto& indexRange = referenceData_.Indices(tId);
+ if (indexRange.first == nullIndex() && indexRange.second == nullIndex())
+ return IndexResultBlocks{ };
+ const auto numReads = indexRange.second - indexRange.first;
+ auto blocks = IndexResultBlocks{ IndexResultBlock(indexRange.first, numReads) };
+ basicData_.ApplyOffsets(blocks);
+ return blocks;
+}
+
+inline IndexResultBlocks
+PbiIndexPrivate::MergeBlocksWithOffsets(const IndexList& indices) const
+{
+ auto blocks = mergedIndexBlocks(indices);
+ basicData_.ApplyOffsets(blocks);
+ return blocks;
+}
+
+} // namespace internal
+
+inline PbiFile::Sections PbiIndex::FileSections(void) const
+{ return d_->sections_; }
+
+inline bool PbiIndex::HasBarcodeData(void) const
+{ return d_->HasSection(PbiFile::BARCODE); }
+
+inline bool PbiIndex::HasMappedData(void) const
+{ return d_->HasSection(PbiFile::MAPPED); }
+
+inline bool PbiIndex::HasReferenceData(void) const
+{ return d_->HasSection(PbiFile::REFERENCE); }
+
+inline bool PbiIndex::HasSection(const PbiFile::Section section) const
+{ return d_->HasSection(section); }
+
+inline uint32_t PbiIndex::NumReads(void) const
+{ return d_->numReads_; }
+
+inline PbiFile::VersionEnum PbiIndex::Version(void) const
+{ return d_->version_; }
+
+inline const BarcodeLookupData& PbiIndex::BarcodeData(void) const
+{ return d_->barcodeData_; }
+
+inline const BasicLookupData& PbiIndex::BasicData(void) const
+{ return d_->basicData_; }
+
+inline const MappedLookupData& PbiIndex::MappedData(void) const
+{ return d_->mappedData_; }
+
+inline const ReferenceLookupData& PbiIndex::ReferenceData(void) const
+{ return d_->referenceData_; }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiLookupData.inl
+/// \brief Inline implementations for the classes used for PBI data lookup.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiLookupData.h"
+#include "pbbam/PbiRawData.h"
+#include "pbbam/Strand.h"
+#include <algorithm>
+#include <unordered_set>
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+
+// ----------------
+// helper methods
+// ----------------
+
+inline IndexResultBlocks mergedIndexBlocks(IndexList&& indices)
+{
+ if (indices.empty())
+ return IndexResultBlocks{ };
+
+ std::sort(indices.begin(), indices.end());
+ auto newEndIter = std::unique(indices.begin(), indices.end());
+ auto numIndices = std::distance(indices.begin(), newEndIter);
+ assert(!indices.empty());
+ auto result = IndexResultBlocks{ IndexResultBlock(indices.at(0), 1) };
+ for (auto i = 1; i < numIndices; ++i) {
+ if (indices.at(i) == indices.at(i-1)+1)
+ ++result.back().numReads_;
+ else
+ result.push_back(IndexResultBlock(indices.at(i), 1));
+ }
+ return result;
+}
+
+inline IndexResultBlocks mergedIndexBlocks(const IndexList& indices)
+{
+ auto copy = indices;
+ return mergedIndexBlocks(std::move(copy));
+}
+
+inline size_t nullIndex(void)
+{ return static_cast<size_t>(-1); }
+
+inline void pushBackIndices(IndexList& result,
+ const IndexList& toAppend)
+{
+ result.reserve(result.size() + toAppend.size());
+ for (auto element : toAppend)
+ result.push_back(element);
+}
+
+// -----------------
+// OrderedLookup
+// -----------------
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(void) { }
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(const container_type& data)
+ : data_(data)
+{ }
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(container_type&& data)
+ : data_(std::move(data))
+{ }
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(const std::vector<T>& rawData)
+{
+ const auto numElements = rawData.size();
+ for (auto i = decltype(numElements){0}; i < numElements; ++i)
+ data_[rawData.at(i)].push_back(i);
+}
+
+template<typename T>
+inline OrderedLookup<T>::OrderedLookup(std::vector<T>&& rawData)
+{
+ const auto numElements = rawData.size();
+ for (auto i = decltype(numElements){0}; i < numElements; ++i)
+ data_[rawData.at(i)].push_back(i);
+}
+
+template<typename T>
+inline bool OrderedLookup<T>::operator==(const OrderedLookup<T>& other) const
+{ return data_ == other.data_; }
+
+template<typename T>
+inline bool OrderedLookup<T>::operator!=(const OrderedLookup<T>& other) const
+{ return !(*this == other); }
+
+template<typename T>
+inline typename OrderedLookup<T>::iterator OrderedLookup<T>::begin(void)
+{ return data_.begin(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::const_iterator OrderedLookup<T>::begin(void) const
+{ return data_.cbegin(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::const_iterator OrderedLookup<T>::cbegin(void) const
+{ return data_.cbegin(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::iterator OrderedLookup<T>::end(void)
+{ return data_.end(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::const_iterator OrderedLookup<T>::end(void) const
+{ return data_.cend(); }
+
+template<typename T>
+inline typename OrderedLookup<T>::const_iterator OrderedLookup<T>::cend(void) const
+{ return data_.cend(); }
+
+template<typename T>
+inline bool OrderedLookup<T>::empty(void) const
+{ return data_.empty(); }
+
+template<typename T>
+inline size_t OrderedLookup<T>::size(void) const
+{ return data_.size(); }
+
+template<typename T>
+inline IndexList
+OrderedLookup<T>::LookupInclusiveRange(const const_iterator &begin,
+ const const_iterator &end) const
+{
+ auto result = IndexList{ };
+ for (auto iter = begin; iter != end; ++iter)
+ pushBackIndices(result, iter->second);
+ std::sort(result.begin(), result.end());
+ return result;
+}
+
+template<typename T>
+inline IndexList
+OrderedLookup<T>::LookupExclusiveRange(const const_iterator& begin,
+ const const_iterator& end,
+ const key_type& key) const
+{
+ auto result = IndexList{ };
+ for (auto iter = begin; iter != end; ++iter) {
+ if (iter->first != key)
+ pushBackIndices(result, iter->second);
+ }
+ std::sort(result.begin(), result.end());
+ return result;
+}
+
+template<typename T>
+inline IndexList
+OrderedLookup<T>::LookupIndices(const OrderedLookup::key_type& key,
+ const Compare::Type& compare) const
+{
+ auto begin = data_.cbegin();
+ auto end = data_.cend();
+ switch(compare)
+ {
+ case Compare::EQUAL:
+ {
+ const auto found = data_.find(key);
+ if (found != end)
+ return found->second;
+ return IndexList();
+ }
+ case Compare::LESS_THAN: return LookupExclusiveRange(begin, data_.upper_bound(key), key);
+ case Compare::LESS_THAN_EQUAL: return LookupInclusiveRange(begin, data_.upper_bound(key));
+ case Compare::GREATER_THAN: return LookupExclusiveRange(data_.lower_bound(key), end, key);
+ case Compare::GREATER_THAN_EQUAL: return LookupInclusiveRange(data_.lower_bound(key), end);
+ case Compare::NOT_EQUAL: return LookupExclusiveRange(begin, end, key);
+ default:
+ assert(false);
+ }
+ return IndexList{ };
+}
+
+template<typename T>
+inline std::vector<T> OrderedLookup<T>::Unpack(void) const
+{
+ auto result = std::vector<T>{ };
+ auto iter = cbegin();
+ const auto end = cend();
+ for ( ; iter != end; ++iter ) {
+ const auto& indices = iter->second;
+ for (auto&& i : indices) {
+ if (result.size() <= i)
+ result.resize(i+1);
+ result[i] = iter->first;
+ }
+ }
+ return result;
+}
+
+// -----------------
+// UnorderedLookup
+// -----------------
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(void) { }
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(const container_type& data)
+ : data_(data)
+{ }
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(container_type&& data)
+ : data_(std::move(data))
+{ }
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(const std::vector<T>& rawData)
+{
+ const auto numElements = rawData.size();
+ for (auto i = decltype(numElements){0}; i < numElements; ++i)
+ data_[rawData.at(i)].push_back(i);
+}
+
+template<typename T>
+inline UnorderedLookup<T>::UnorderedLookup(std::vector<T>&& rawData)
+{
+ const auto numElements = rawData.size();
+ for (auto i = decltype(numElements){0}; i < numElements; ++i)
+ data_[rawData.at(i)].push_back(i);
+}
+
+template<typename T>
+inline bool UnorderedLookup<T>::operator==(const UnorderedLookup<T>& other) const
+{ return data_ == other.data_; }
+
+template<typename T>
+inline bool UnorderedLookup<T>::operator!=(const UnorderedLookup<T>& other) const
+{ return !(*this == other); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::iterator UnorderedLookup<T>::begin(void)
+{ return data_.begin(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::const_iterator UnorderedLookup<T>::begin(void) const
+{ return data_.cbegin(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::const_iterator UnorderedLookup<T>::cbegin(void) const
+{ return data_.cbegin(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::iterator UnorderedLookup<T>::end(void)
+{ return data_.end(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::const_iterator UnorderedLookup<T>::end(void) const
+{ return data_.cend(); }
+
+template<typename T>
+inline typename UnorderedLookup<T>::const_iterator UnorderedLookup<T>::cend(void) const
+{ return data_.cend(); }
+
+template<typename T>
+inline bool UnorderedLookup<T>::empty(void) const
+{ return data_.empty(); }
+
+template<typename T>
+inline size_t UnorderedLookup<T>::size(void) const
+{ return data_.size(); }
+
+template<typename T>
+template<typename Compare>
+inline IndexList
+UnorderedLookup<T>::LookupHelper(const UnorderedLookup::key_type& key,
+ const Compare& cmp) const
+{
+ auto result = IndexList{ }; // init with some avg size ??
+ const auto end = data_.cend();
+ for (auto iter = data_.cbegin(); iter != end; ++iter) {
+ const auto e = (iter->first);
+ if (cmp(e, key))
+ pushBackIndices(result, iter->second);
+ }
+ std::sort(result.begin(), result.end());
+ return result;
+}
+
+template<typename T>
+inline IndexList
+UnorderedLookup<T>::LookupIndices(const UnorderedLookup::key_type& key,
+ const Compare::Type& compare) const
+{
+ switch (compare) {
+ case Compare::EQUAL:
+ {
+ const auto found = data_.find(key);
+ if (found != data_.cend())
+ return found->second;
+ else
+ return IndexList();
+ }
+ case Compare::LESS_THAN: return LookupHelper(key, std::less<key_type>());
+ case Compare::LESS_THAN_EQUAL: return LookupHelper(key, std::less_equal<key_type>());
+ case Compare::GREATER_THAN: return LookupHelper(key, std::greater<key_type>());
+ case Compare::GREATER_THAN_EQUAL: return LookupHelper(key, std::greater_equal<key_type>());
+ case Compare::NOT_EQUAL: return LookupHelper(key, std::not_equal_to<key_type>());
+ default:
+ assert(false);
+ }
+ return IndexList{ };
+}
+
+template<typename T>
+inline std::vector<T> UnorderedLookup<T>::Unpack(void) const
+{
+ auto result = std::vector<T>{ };
+ auto iter = cbegin();
+ const auto end = cend();
+ for ( ; iter != end; ++iter ) {
+ const auto& indices = iter->second;
+ for (auto&& i : indices) {
+ if (result.size() <= i)
+ result.resize(i+1);
+ result[i] = iter->first;
+ }
+ }
+ return result;
+}
+
+// -------------------
+// SubreadLookupData
+// -------------------
+
+inline
+void BasicLookupData::ApplyOffsets(IndexResultBlocks& blocks) const
+{
+ for (IndexResultBlock& block : blocks)
+ block.virtualOffset_ = fileOffset_.at(block.firstIndex_);
+}
+
+template<typename T>
+inline IndexList BasicLookupData::Indices(const BasicLookupData::Field& field,
+ const T& value,
+ const Compare::Type& compareType) const
+{
+ switch(field) {
+ case BasicLookupData::RG_ID: return rgId_.LookupIndices(value, compareType);
+ case BasicLookupData::Q_START: return qStart_.LookupIndices(value, compareType);
+ case BasicLookupData::Q_END: return qEnd_.LookupIndices(value, compareType);
+ case BasicLookupData::ZMW: return holeNumber_.LookupIndices(value, compareType);
+ case BasicLookupData::READ_QUALITY: return readQual_.LookupIndices(value, compareType);
+ case BasicLookupData::CONTEXT_FLAG: return ctxtFlag_.LookupIndices(value, compareType);
+
+ case BasicLookupData::VIRTUAL_OFFSET : // fall-through, not supported this way
+ default:
+ assert(false);
+ }
+ return IndexList{ };
+}
+
+template<typename T>
+inline IndexList BasicLookupData::IndicesMulti(const BasicLookupData::Field& field,
+ const std::vector<T>& values) const
+{
+ auto result = IndexList{ };
+ for (auto value : values) {
+ const auto valueIndices = Indices(field, value, Compare::EQUAL);
+ result.reserve(result.size() + valueIndices.size());
+ for (auto i : valueIndices)
+ result.push_back(i);
+ }
+ return result;
+}
+
+inline const std::vector<int64_t>& BasicLookupData::VirtualFileOffsets(void) const
+{ return fileOffset_; }
+
+// -------------------
+// MappedLookupData
+// -------------------
+
+template<typename T>
+inline IndexList MappedLookupData::Indices(const MappedLookupData::Field& field,
+ const T& value,
+ const Compare::Type& compareType) const
+{
+ switch(field) {
+ case MappedLookupData::T_ID: return tId_.LookupIndices(value, compareType);
+ case MappedLookupData::T_START: return tStart_.LookupIndices(value, compareType);
+ case MappedLookupData::T_END: return tEnd_.LookupIndices(value, compareType);
+ case MappedLookupData::A_START: return aStart_.LookupIndices(value, compareType);
+ case MappedLookupData::A_END: return aEnd_.LookupIndices(value, compareType);
+ case MappedLookupData::N_M: return nM_.LookupIndices(value, compareType);
+ case MappedLookupData::N_MM: return nMM_.LookupIndices(value, compareType);
+ case MappedLookupData::N_DEL: return nDel_.LookupIndices(value, compareType);
+ case MappedLookupData::N_INS: return nIns_.LookupIndices(value, compareType);
+ case MappedLookupData::MAP_QUALITY: return mapQV_.LookupIndices(value, compareType);
+
+ // MappedField::STRAND has its own specialization
+
+ default:
+ assert(false);
+ }
+ return IndexList{ };
+}
+
+template<>
+inline IndexList MappedLookupData::Indices(const MappedLookupData::Field& field,
+ const Strand& strand,
+ const Compare::Type& compareType) const
+{
+ assert(field == MappedLookupData::STRAND);
+ (void)field; // quash warnings building in release mode
+
+ if (compareType == Compare::EQUAL) {
+ if (strand == Strand::FORWARD)
+ return forwardStrand_;
+ else
+ return reverseStrand_;
+ } else if (compareType == Compare::NOT_EQUAL) {
+ if (strand == Strand::FORWARD)
+ return reverseStrand_;
+ else
+ return forwardStrand_;
+ }
+
+ // only EQUAL/NOT_EQUAL supported
+ assert(false);
+ return IndexList{ };
+}
+
+template<typename T>
+inline IndexList MappedLookupData::IndicesMulti(const MappedLookupData::Field& field,
+ const std::vector<T>& values) const
+{
+ auto result = IndexList{ };
+ for (auto value : values) {
+ auto valueIndices = Indices(field, value, Compare::EQUAL);
+ result.reserve(result.size() + valueIndices.size());
+ for (auto i : valueIndices)
+ result.push_back(i);
+ }
+ return result;
+}
+
+
+// ---------------------
+// ReferenceLookupData
+// ---------------------
+
+inline IndexRange ReferenceLookupData::Indices(const int32_t tId) const
+{
+ auto found = references_.find(tId);
+ if (found == references_.cend())
+ return IndexRange{ nullIndex(), nullIndex() };
+ return found->second;
+}
+
+// -------------------
+// BarcodeLookupData
+// -------------------
+
+template<typename T>
+inline IndexList BarcodeLookupData::Indices(const BarcodeLookupData::Field &field,
+ const T& value,
+ const Compare::Type &compareType) const
+{
+ switch(field) {
+ case BarcodeLookupData::BC_FORWARD: return bcForward_.LookupIndices(value, compareType);
+ case BarcodeLookupData::BC_REVERSE: return bcReverse_.LookupIndices(value, compareType);
+ case BarcodeLookupData::BC_QUALITY: return bcQual_.LookupIndices(value, compareType);
+ default:
+ assert(false);
+ }
+ return IndexList{ };
+}
+
+template<typename T>
+inline IndexList BarcodeLookupData::IndicesMulti(const BarcodeLookupData::Field &field,
+ const std::vector<T>& values) const
+{
+ IndexList result;
+ for (auto value : values) {
+ const IndexList& valueIndices = Indices(field, value, Compare::EQUAL);
+ result.reserve(result.size() + valueIndices.size());
+ for (auto i : valueIndices)
+ result.push_back(i);
+ }
+ return result;
+}
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiRawData.inl
+/// \brief Inline implementations for the classes used for working with raw PBI
+/// data.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiRawData.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline const PbiRawBarcodeData& PbiRawData::BarcodeData(void) const
+{ return barcodeData_; }
+
+inline PbiRawBarcodeData& PbiRawData::BarcodeData(void)
+{ return barcodeData_; }
+
+inline const PbiRawBasicData& PbiRawData::BasicData(void) const
+{ return basicData_; }
+
+inline PbiRawBasicData& PbiRawData::BasicData(void)
+{ return basicData_; }
+
+inline std::string PbiRawData::Filename(void) const
+{ return filename_; }
+
+inline PbiFile::Sections PbiRawData::FileSections(void) const
+{ return sections_; }
+
+inline PbiRawData& PbiRawData::FileSections(PbiFile::Sections sections)
+{ sections_ = sections; return *this; }
+
+inline bool PbiRawData::HasBarcodeData(void) const
+{ return HasSection(PbiFile::BARCODE); }
+
+inline bool PbiRawData::HasMappedData(void) const
+{ return HasSection(PbiFile::MAPPED); }
+
+inline bool PbiRawData::HasReferenceData(void) const
+{ return HasSection(PbiFile::REFERENCE); }
+
+inline bool PbiRawData::HasSection(const PbiFile::Section section) const
+{ return (sections_ & section) != 0; }
+
+inline uint32_t PbiRawData::NumReads(void) const
+{ return numReads_; }
+
+inline PbiRawData& PbiRawData::NumReads(uint32_t num)
+{ numReads_ = num; return *this; }
+
+inline const PbiRawMappedData& PbiRawData::MappedData(void) const
+{ return mappedData_; }
+
+inline PbiRawMappedData& PbiRawData::MappedData(void)
+{ return mappedData_; }
+
+inline const PbiRawReferenceData& PbiRawData::ReferenceData(void) const
+{ return referenceData_; }
+
+inline PbiRawReferenceData& PbiRawData::ReferenceData(void)
+{ return referenceData_; }
+
+inline PbiFile::VersionEnum PbiRawData::Version(void) const
+{ return version_; }
+
+inline PbiRawData& PbiRawData::Version(PbiFile::VersionEnum version)
+{ version_ = version; return *this; }
+
+inline bool PbiReferenceEntry::operator==(const PbiReferenceEntry& other) const
+{
+ return tId_ == other.tId_ &&
+ beginRow_ == other.beginRow_ &&
+ endRow_ == other.endRow_;
+}
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ProgramInfo.inl
+/// \brief Inline implementations for the ProgramInfo class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/ProgramInfo.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline std::string ProgramInfo::CommandLine(void) const
+{ return commandLine_; }
+
+inline ProgramInfo& ProgramInfo::CommandLine(const std::string& cmd)
+{ commandLine_ = cmd; return *this; }
+
+inline std::map<std::string, std::string> ProgramInfo::CustomTags(void) const
+{ return custom_; }
+
+inline ProgramInfo& ProgramInfo::CustomTags(const std::map<std::string,
+ std::string>& custom)
+{ custom_ = custom; return *this; }
+
+inline std::string ProgramInfo::Description(void) const
+{ return description_; }
+
+inline ProgramInfo& ProgramInfo::Description(const std::string& description)
+{ description_ = description; return *this; }
+
+inline std::string ProgramInfo::Id(void) const
+{ return id_; }
+
+inline ProgramInfo& ProgramInfo::Id(const std::string& id)
+{ id_ = id; return *this; }
+
+inline bool ProgramInfo::IsValid(void) const
+{ return !id_.empty(); }
+
+inline std::string ProgramInfo::Name(void) const
+{ return name_; }
+
+inline ProgramInfo& ProgramInfo::Name(const std::string& name)
+{ name_ = name; return *this; }
+
+inline std::string ProgramInfo::PreviousProgramId(void) const
+{ return previousProgramId_; }
+
+inline ProgramInfo& ProgramInfo::PreviousProgramId(const std::string& id)
+{ previousProgramId_ = id; return *this; }
+
+inline std::string ProgramInfo::ToSam(const ProgramInfo& prog)
+{ return prog.ToSam(); }
+
+inline std::string ProgramInfo::Version(void) const
+{ return version_; }
+
+inline ProgramInfo& ProgramInfo::Version(const std::string& version)
+{ version_ = version; return *this; }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file QualityValue.inl
+/// \brief Inline implementations for the QualityValue class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/QualityValue.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline QualityValue::QualityValue(const uint8_t value)
+ : value_(value)
+{
+ // clamp QV
+ if (value_ > QualityValue::MAX)
+ value_ = QualityValue::MAX;
+}
+
+inline QualityValue::QualityValue(const QualityValue& other)
+ : value_(other.value_)
+{ }
+
+inline QualityValue::~QualityValue(void) { }
+
+inline char QualityValue::Fastq(void) const
+{ return static_cast<char>(value_ + 33); }
+
+inline QualityValue::operator uint8_t(void) const
+{ return value_; }
+
+inline QualityValue QualityValue::FromFastq(const char c)
+{ return QualityValue(static_cast<uint8_t>(c-33)); }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file QualityValues.inl
+/// \brief Inline implementations for the QualityValues class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/QualityValues.h"
+#include <algorithm>
+
+namespace PacBio {
+namespace BAM {
+
+inline QualityValues::QualityValues(void)
+ : std::vector<QualityValue>()
+{ }
+
+inline QualityValues::QualityValues(const std::string& fastqString)
+ : std::vector<QualityValue>()
+{
+ resize(fastqString.size());
+ std::transform(fastqString.cbegin(), fastqString.cend(),
+ begin(), QualityValue::FromFastq);
+}
+
+inline QualityValues::QualityValues(const std::vector<QualityValue>& quals)
+ : std::vector<QualityValue>(quals)
+{ }
+
+inline QualityValues::QualityValues(const std::vector<uint8_t>& quals)
+ : std::vector<QualityValue>()
+{
+ resize(quals.size());
+ std::copy(quals.cbegin(), quals.cend(), begin());
+}
+
+inline QualityValues::QualityValues(const std::vector<uint8_t>::const_iterator first,
+ const std::vector<uint8_t>::const_iterator last)
+ : std::vector<QualityValue>(first, last)
+{ }
+
+inline QualityValues::QualityValues(const QualityValues::const_iterator first,
+ const QualityValues::const_iterator last)
+ : std::vector<QualityValue>()
+{
+ assign(first, last);
+}
+
+inline QualityValues::QualityValues(const QualityValues& other)
+ : std::vector<QualityValue>(other)
+{ }
+
+inline QualityValues::QualityValues(std::vector<QualityValue>&& quals)
+ : std::vector<QualityValue>(std::move(quals))
+{ }
+
+inline QualityValues::QualityValues(QualityValues&& other)
+ : std::vector<QualityValue>(std::move(other))
+{ }
+
+inline QualityValues& QualityValues::operator=(const QualityValues& other)
+{ std::vector<QualityValue>::operator=(other); return *this; }
+
+inline QualityValues& QualityValues::operator=(const std::vector<QualityValue>& quals)
+{ std::vector<QualityValue>::operator=(quals); return *this; }
+
+inline QualityValues& QualityValues::operator=(QualityValues&& other)
+{ std::vector<QualityValue>::operator=(std::move(other)); return *this; }
+
+inline QualityValues& QualityValues::operator=(std::vector<QualityValue>&& quals)
+{ std::vector<QualityValue>::operator=(std::move(quals)); return *this; }
+
+inline QualityValues::~QualityValues(void) { }
+
+inline std::vector<QualityValue>::const_iterator QualityValues::cbegin(void) const
+{ return std::vector<QualityValue>::cbegin(); }
+
+inline std::vector<QualityValue>::const_iterator QualityValues::cend(void) const
+{ return std::vector<QualityValue>::cend(); }
+
+inline std::vector<QualityValue>::const_iterator QualityValues::begin(void) const
+{ return std::vector<QualityValue>::begin(); }
+
+inline std::vector<QualityValue>::const_iterator QualityValues::end(void) const
+{ return std::vector<QualityValue>::end(); }
+
+inline std::vector<QualityValue>::iterator QualityValues::begin(void)
+{ return std::vector<QualityValue>::begin(); }
+
+inline std::vector<QualityValue>::iterator QualityValues::end(void)
+{ return std::vector<QualityValue>::end(); }
+
+inline QualityValues QualityValues::FromFastq(const std::string& fastq)
+{ return QualityValues(fastq); }
+
+inline std::string QualityValues::Fastq(void) const
+{
+ std::string result;
+ result.reserve(size());
+ auto iter = cbegin();
+ const auto end = cend();
+ for (; iter != end; ++iter)
+ result.push_back((*iter).Fastq());
+ return result;
+}
+
+inline bool QualityValues::operator==(const std::string& fastq) const
+{ return *this == QualityValues(fastq); }
+
+inline bool QualityValues::operator!=(const std::string& fastq) const
+{ return *this != QualityValues(fastq); }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef QUERYBASE_H
+#define QUERYBASE_H
+
+#include "pbbam/BamFile.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/DataSet.h"
+#include <memory>
+#include <vector>
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+template<typename T>
+class QueryBase;
+
+template<typename T>
+class QueryIteratorBase
+{
+public:
+ virtual ~QueryIteratorBase(void);
+
+ bool operator==(const QueryIteratorBase<T>& other) const;
+ bool operator!=(const QueryIteratorBase<T>& other) const;
+
+protected:
+ QueryIteratorBase(void);
+ QueryIteratorBase(QueryBase<T>& query);
+
+ void ReadNext(void);
+
+protected:
+ QueryBase<T>* query_;
+ T record_;
+};
+
+template<typename T>
+class QueryIterator : public QueryIteratorBase<T>
+{
+public:
+ QueryIterator(void);
+ QueryIterator(QueryBase<T>& query);
+
+ T& operator*(void);
+ T* operator->(void);
+
+ QueryIterator<T>& operator++(void);
+ QueryIterator<T> operator++(int);
+};
+
+template<typename T>
+class QueryConstIterator : public QueryIteratorBase<T>
+{
+public:
+ QueryConstIterator(void);
+ QueryConstIterator(const QueryBase<T>& query);
+
+ const T& operator*(void) const;
+ const T* operator->(void) const;
+
+ QueryConstIterator<T>& operator++(void);
+ QueryConstIterator<T> operator++(int);
+};
+
+template<typename T>
+class QueryBase {
+
+public:
+ typedef QueryIterator<T> iterator;
+ typedef QueryConstIterator<T> const_iterator;
+
+public:
+ virtual ~QueryBase(void);
+
+public:
+ QueryConstIterator<T> begin(void) const;
+ QueryConstIterator<T> cbegin(void) const;
+ QueryIterator<T> begin(void);
+
+ QueryConstIterator<T> end(void) const;
+ QueryConstIterator<T> cend(void) const;
+ QueryIterator<T> end(void);
+
+public:
+ virtual bool GetNext(T& r) =0;
+
+protected:
+ QueryBase(void);
+};
+
+typedef QueryBase<BamRecord> IQuery;
+typedef QueryBase<std::vector<BamRecord> > IGroupQuery;
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#include "pbbam/internal/QueryBase.inl"
+
+#endif // QUERYBASE_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "pbbam/internal/QueryBase.h"
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// -------------------
+// QueryIteratorBase
+// -------------------
+
+template<typename T>
+inline QueryIteratorBase<T>::QueryIteratorBase(void)
+ : query_(nullptr)
+{ }
+
+template<typename T>
+inline QueryIteratorBase<T>::QueryIteratorBase(QueryBase<T>& query)
+ : query_(&query)
+{ ReadNext(); }
+
+template<typename T> inline
+QueryIteratorBase<T>::~QueryIteratorBase(void) { }
+
+template<typename T> inline
+bool QueryIteratorBase<T>::operator==(const QueryIteratorBase<T>& other) const
+{ return query_ == other.query_; }
+
+template<typename T> inline
+bool QueryIteratorBase<T>::operator!=(const QueryIteratorBase<T>& other) const
+{ return !(*this == other); }
+
+// -------------------
+// QueryIterator
+// -------------------
+
+template<typename T> inline
+QueryIterator<T>::QueryIterator(void) : QueryIteratorBase<T>() { }
+
+template<typename T> inline
+QueryIterator<T>::QueryIterator(QueryBase<T>& query)
+ : QueryIteratorBase<T>(query)
+{ }
+
+template<typename T> inline
+T& QueryIterator<T>::operator*(void)
+{ return QueryIteratorBase<T>::record_; }
+
+template<typename T> inline
+T* QueryIterator<T>::operator->(void)
+{ return &(operator*()); }
+
+template<typename T> inline
+QueryIterator<T>& QueryIterator<T>::operator++(void)
+{ QueryIteratorBase<T>::ReadNext(); return *this; }
+
+template<typename T> inline
+QueryIterator<T> QueryIterator<T>::operator++(int)
+{
+ QueryIterator<T> result(*this);
+ ++(*this);
+ return result;
+}
+
+// --------------------
+// QueryConstIterator
+// --------------------
+
+template<typename T> inline
+QueryConstIterator<T>::QueryConstIterator(void) : QueryIteratorBase<T>() { }
+
+template<typename T> inline
+QueryConstIterator<T>::QueryConstIterator(const QueryBase<T>& query)
+ : QueryIteratorBase<T>(const_cast<QueryBase<T>&>(query))
+{ }
+
+template<typename T> inline
+const T& QueryConstIterator<T>::operator*(void) const
+{ return QueryIteratorBase<T>::record_; }
+
+template<typename T> inline
+const T* QueryConstIterator<T>::operator->(void) const
+{ return &(operator*()); }
+
+template<typename T> inline
+QueryConstIterator<T>& QueryConstIterator<T>::operator++(void)
+{ QueryIteratorBase<T>::ReadNext(); return *this; }
+
+template<typename T> inline
+QueryConstIterator<T> QueryConstIterator<T>::operator++(int)
+{
+ QueryConstIterator<T> result(*this);
+ ++(*this);
+ return result;
+}
+
+// -----------
+// QueryBase
+// -----------
+
+template<typename T> inline
+QueryBase<T>::QueryBase(void) { }
+
+template<typename T> inline
+QueryBase<T>::~QueryBase(void) { }
+
+template<typename T> inline
+QueryConstIterator<T> QueryBase<T>::begin(void) const
+{ return QueryConstIterator<T>(*this); }
+
+template<typename T> inline
+QueryIterator<T> QueryBase<T>::begin(void)
+{ return QueryIterator<T>(*this); }
+
+template<typename T> inline
+QueryConstIterator<T> QueryBase<T>::cbegin(void) const
+{ return QueryConstIterator<T>(*this); }
+
+template<typename T> inline
+QueryConstIterator<T> QueryBase<T>::cend(void) const
+{ return QueryConstIterator<T>(); }
+
+template<typename T> inline
+QueryConstIterator<T> QueryBase<T>::end(void) const
+{ return QueryConstIterator<T>(); }
+
+template<typename T> inline
+QueryIterator<T> QueryBase<T>::end(void)
+{ return QueryIterator<T>(); }
+
+template<typename T>
+inline void QueryIteratorBase<T>::ReadNext(void)
+{
+ assert(query_);
+ if (!query_->GetNext(record_))
+ query_ = nullptr;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ReadGroupInfo.inl
+/// \brief Inline implementations for the ReadGroupInfo class.
+//
+// Author: Derek Barnett
+
+#include <stdexcept>
+#include "pbbam/ReadGroupInfo.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline size_t ReadGroupInfo::BarcodeCount(void) const
+{
+ if (!hasBarcodeData_)
+ throw std::runtime_error("barcode count requested but barcode data is missing");
+ return barcodeCount_;
+}
+
+inline ReadGroupInfo& ReadGroupInfo::BarcodeData(const std::string& barcodeFile,
+ const std::string& barcodeHash,
+ size_t barcodeCount,
+ BarcodeModeType barcodeMode,
+ BarcodeQualityType barcodeQuality)
+{
+ barcodeFile_ = barcodeFile;
+ barcodeHash_ = barcodeHash;
+ barcodeCount_ = barcodeCount;
+ barcodeMode_ = barcodeMode;
+ barcodeQuality_ = barcodeQuality;
+ hasBarcodeData_ = true;
+ return *this;
+}
+
+inline std::string ReadGroupInfo::BarcodeFile(void) const
+{
+ if (!hasBarcodeData_)
+ throw std::runtime_error("barcode file requested but barcode data is missing");
+ return barcodeFile_;
+}
+
+inline std::string ReadGroupInfo::BarcodeHash(void) const
+{
+ if (!hasBarcodeData_)
+ throw std::runtime_error("barcode hash requested but barcode data is missing");
+ return barcodeHash_;
+}
+
+inline BarcodeModeType ReadGroupInfo::BarcodeMode(void) const
+{
+ if (!hasBarcodeData_)
+ throw std::runtime_error("barcode mode requested but barcode data is missing");
+ return barcodeMode_;
+}
+
+inline BarcodeQualityType ReadGroupInfo::BarcodeQuality(void) const
+{
+ if (!hasBarcodeData_)
+ throw std::runtime_error("barcode quality requested but barcode data is missing");
+ return barcodeQuality_;
+}
+
+inline std::string ReadGroupInfo::BasecallerVersion(void) const
+{ return basecallerVersion_; }
+
+inline ReadGroupInfo& ReadGroupInfo::BasecallerVersion(const std::string& versionNumber)
+{ basecallerVersion_ = versionNumber; return *this; }
+
+inline std::string ReadGroupInfo::BaseFeatureTag(const BaseFeature& feature) const
+{
+ const auto iter = features_.find(feature);
+ if (iter == features_.end())
+ return std::string();
+ return iter->second;
+}
+
+inline ReadGroupInfo& ReadGroupInfo::BaseFeatureTag(const BaseFeature& feature,
+ const std::string& tag)
+{ features_[feature] = tag; return *this; }
+
+inline std::string ReadGroupInfo::BindingKit(void) const
+{ return bindingKit_; }
+
+inline ReadGroupInfo& ReadGroupInfo::BindingKit(const std::string& kitNumber)
+{ bindingKit_ = kitNumber; return *this; }
+
+inline ReadGroupInfo& ReadGroupInfo::ClearBarcodeData(void)
+{
+ barcodeFile_.clear();
+ barcodeHash_.clear();
+ hasBarcodeData_ = false;
+ return *this;
+}
+
+inline ReadGroupInfo& ReadGroupInfo::ClearBaseFeatures(void)
+{
+ features_.clear();
+ return *this;
+}
+
+inline bool ReadGroupInfo::Control(void) const
+{ return control_; }
+
+inline ReadGroupInfo& ReadGroupInfo::Control(const bool ctrl)
+{ control_ = ctrl; return *this; }
+
+inline std::map<std::string, std::string> ReadGroupInfo::CustomTags(void) const
+{ return custom_; }
+
+inline ReadGroupInfo& ReadGroupInfo::CustomTags(const std::map<std::string, std::string>& custom)
+{ custom_ = custom; return *this; }
+
+inline std::string ReadGroupInfo::Date(void) const
+{ return date_; }
+
+inline ReadGroupInfo& ReadGroupInfo::Date(const std::string& date)
+{ date_ = date; return *this; }
+
+inline std::string ReadGroupInfo::FlowOrder(void) const
+{ return flowOrder_; }
+
+inline ReadGroupInfo& ReadGroupInfo::FlowOrder(const std::string& order)
+{ flowOrder_ = order; return *this; }
+
+inline std::string ReadGroupInfo::FrameRateHz(void) const
+{ return frameRateHz_; }
+
+inline ReadGroupInfo& ReadGroupInfo::FrameRateHz(const std::string& frameRateHz)
+{ frameRateHz_ = frameRateHz; return *this; }
+
+inline bool ReadGroupInfo::HasBarcodeData(void) const
+{ return hasBarcodeData_; }
+
+inline bool ReadGroupInfo::HasBaseFeature(const BaseFeature& feature) const
+{ return features_.find(feature) != features_.end(); }
+
+inline std::string ReadGroupInfo::Id(void) const
+{ return id_; }
+
+inline ReadGroupInfo& ReadGroupInfo::Id(const std::string& id)
+{ id_ = id; return *this; }
+
+inline ReadGroupInfo& ReadGroupInfo::Id(const std::string& movieName,
+ const std::string& readType)
+{ id_ = MakeReadGroupId(movieName, readType); return *this; }
+
+inline int32_t ReadGroupInfo::IdToInt(const std::string& rgId)
+{
+ const uint32_t rawid = std::stoul(rgId, nullptr, 16);
+ return static_cast<int32_t>(rawid);
+}
+
+inline FrameCodec ReadGroupInfo::IpdCodec(void) const
+{ return ipdCodec_; }
+
+inline bool ReadGroupInfo::IsValid(void) const
+{ return !id_.empty(); }
+
+inline std::string ReadGroupInfo::KeySequence(void) const
+{ return keySequence_; }
+
+inline ReadGroupInfo& ReadGroupInfo::KeySequence(const std::string& sequence)
+{ keySequence_ = sequence; return *this; }
+
+inline std::string ReadGroupInfo::Library(void) const
+{ return library_; }
+
+inline ReadGroupInfo& ReadGroupInfo::Library(const std::string& library)
+{ library_ = library; return *this; }
+
+inline std::string ReadGroupInfo::MovieName(void) const
+{ return movieName_; }
+
+inline ReadGroupInfo& ReadGroupInfo::MovieName(const std::string& movieName)
+{ movieName_ = movieName; return *this; }
+
+inline std::string ReadGroupInfo::Platform(void) const
+{ return std::string("PACBIO"); }
+
+inline PlatformModelType ReadGroupInfo::PlatformModel(void) const
+{ return platformModel_; }
+
+inline ReadGroupInfo& ReadGroupInfo::PlatformModel(const PlatformModelType& platform)
+{ platformModel_ = platform; return *this; }
+
+inline std::string ReadGroupInfo::PredictedInsertSize(void) const
+{ return predictedInsertSize_; }
+
+inline ReadGroupInfo& ReadGroupInfo::PredictedInsertSize(const std::string& size)
+{ predictedInsertSize_ = size; return *this; }
+
+inline std::string ReadGroupInfo::Programs(void) const
+{ return programs_; }
+
+inline ReadGroupInfo& ReadGroupInfo::Programs(const std::string& programs)
+{ programs_ = programs; return *this; }
+
+inline FrameCodec ReadGroupInfo::PulseWidthCodec(void) const
+{ return pulseWidthCodec_; }
+
+inline std::string ReadGroupInfo::ReadType(void) const
+{ return readType_; }
+
+inline ReadGroupInfo& ReadGroupInfo::ReadType(const std::string& type)
+{ readType_ = type; return *this; }
+
+inline ReadGroupInfo& ReadGroupInfo::RemoveBaseFeature(const BaseFeature& feature)
+{
+ auto iter = features_.find(feature);
+ if (iter != features_.end())
+ features_.erase(iter);
+ return *this;
+}
+
+inline std::string ReadGroupInfo::Sample(void) const
+{ return sample_; }
+
+inline ReadGroupInfo& ReadGroupInfo::Sample(const std::string& sample)
+{ sample_ = sample; return *this; }
+
+inline std::string ReadGroupInfo::SequencingCenter(void) const
+{ return sequencingCenter_; }
+
+inline ReadGroupInfo& ReadGroupInfo::SequencingCenter(const std::string& center)
+{ sequencingCenter_ = center; return *this; }
+
+inline std::string ReadGroupInfo::SequencingChemistry(void) const
+{
+ return SequencingChemistryFromTriple(BindingKit(),
+ SequencingKit(),
+ BasecallerVersion());
+}
+
+inline std::string ReadGroupInfo::SequencingKit(void) const
+{ return sequencingKit_; }
+
+inline ReadGroupInfo& ReadGroupInfo::SequencingKit(const std::string& kitNumber)
+{ sequencingKit_ = kitNumber; return *this; }
+
+inline std::string ReadGroupInfo::ToSam(const ReadGroupInfo& rg)
+{ return rg.ToSam(); }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file SequenceInfo.inl
+/// \brief Inline implementations for the SequenceInfo class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/SequenceInfo.h"
+
+namespace PacBio {
+namespace BAM {
+
+inline bool SequenceInfo::operator==(const SequenceInfo& other) const
+{
+ return assemblyId_ == other.assemblyId_ &&
+ checksum_ == other.checksum_ &&
+ length_ == other.length_ &&
+ name_ == other.name_ &&
+ species_ == other.species_ &&
+ uri_ == other.uri_ &&
+ custom_ == other.custom_;
+}
+
+inline bool SequenceInfo::operator!=(const SequenceInfo& other) const
+{ return !(*this == other); }
+
+inline std::string SequenceInfo::AssemblyId(void) const
+{ return assemblyId_; }
+
+inline SequenceInfo& SequenceInfo::AssemblyId(const std::string& id)
+{ assemblyId_ = id; return *this; }
+
+inline std::string SequenceInfo::Checksum(void) const
+{ return checksum_; }
+
+inline SequenceInfo& SequenceInfo::Checksum(const std::string& checksum)
+{ checksum_ = checksum; return *this; }
+
+inline std::map<std::string, std::string> SequenceInfo::CustomTags(void) const
+{ return custom_; }
+
+inline SequenceInfo& SequenceInfo::CustomTags(const std::map<std::string, std::string>& custom)
+{ custom_ = custom; return *this; }
+
+inline std::string SequenceInfo::Length(void) const
+{ return length_; }
+
+inline SequenceInfo& SequenceInfo::Length(const std::string& length)
+{ length_ = length; return *this; }
+
+inline std::string SequenceInfo::Name(void) const
+{ return name_; }
+
+inline SequenceInfo& SequenceInfo::Name(const std::string& name)
+{ name_ = name; return *this; }
+
+inline std::string SequenceInfo::Species(void) const
+{ return species_; }
+
+inline SequenceInfo& SequenceInfo::Species(const std::string& species)
+{ species_ = species; return *this; }
+
+inline std::string SequenceInfo::ToSam(const SequenceInfo& seq)
+{ return seq.ToSam(); }
+
+inline std::string SequenceInfo::Uri(void) const
+{ return uri_; }
+
+inline SequenceInfo& SequenceInfo::Uri(const std::string& uri)
+{ uri_ = uri; return *this; }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Tag.inl
+/// \brief Inline implementations for the Tag class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Tag.h"
+#include <boost/numeric/conversion/cast.hpp>
+#include <iostream>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+template<typename T>
+inline bool InAsciiRange(const T& x)
+{ return (x >=33 && x <= 127); }
+
+struct AsciiConvertVisitor : public boost::static_visitor<char>
+{
+ // only valid for numeric types - maybe even more restrictive?
+ char operator() (const int8_t& x) const { return Helper(x); }
+ char operator() (const uint8_t& x) const { return Helper(x); }
+ char operator() (const int16_t& x) const { return Helper(x); }
+ char operator() (const uint16_t& x) const { return Helper(x); }
+ char operator() (const int32_t& x) const { return Helper(x); }
+ char operator() (const uint32_t& x) const { return Helper(x); }
+
+ // anything else always throws
+ template<typename T>
+ char operator()(const T&) const
+ { throw std::runtime_error("conversion not supported"); return 0; }
+
+private:
+ template<typename T>
+ char Helper(const T& x) const
+ {
+ if (!InAsciiRange(x))
+ throw std::runtime_error("not valid ASCII");
+ return static_cast<char>(x);
+ }
+};
+
+template<typename DesiredType>
+struct NumericConvertVisitor : public boost::static_visitor<DesiredType>
+{
+ // only valid for integral types
+ DesiredType operator() (const int8_t& x) const { return boost::numeric_cast<DesiredType>(x); }
+ DesiredType operator() (const uint8_t& x) const { return boost::numeric_cast<DesiredType>(x); }
+ DesiredType operator() (const int16_t& x) const { return boost::numeric_cast<DesiredType>(x); }
+ DesiredType operator() (const uint16_t& x) const { return boost::numeric_cast<DesiredType>(x); }
+ DesiredType operator() (const int32_t& x) const { return boost::numeric_cast<DesiredType>(x); }
+ DesiredType operator() (const uint32_t& x) const { return boost::numeric_cast<DesiredType>(x); }
+
+ // anything else always throws
+ template<typename T> DesiredType operator()(const T& t) const
+ {
+ const std::string from = typeid(t).name();
+ const std::string to = typeid(DesiredType).name();
+ const std::string msg = std::string("conversion not supported: ") + from + " -> " + to;
+ throw std::runtime_error(msg);
+ return 0;
+ }
+};
+
+typedef NumericConvertVisitor<int8_t> ToInt8ConvertVisitor;
+typedef NumericConvertVisitor<uint8_t> ToUInt8ConvertVisitor;
+typedef NumericConvertVisitor<int16_t> ToInt16ConvertVisitor;
+typedef NumericConvertVisitor<uint16_t> ToUInt16ConvertVisitor;
+typedef NumericConvertVisitor<int32_t> ToInt32ConvertVisitor;
+typedef NumericConvertVisitor<uint32_t> ToUInt32ConvertVisitor;
+
+struct IsEqualVisitor : public boost::static_visitor<bool>
+{
+ template <typename T, typename U>
+ bool operator() (const T&, const U&) const
+ {
+ // maybe allow conversions down the road?
+ // but for now, just fail if types are different
+ return false;
+ }
+
+ bool operator() (const boost::blank&, const boost::blank&) const
+ { return true; }
+
+ template <typename T>
+ bool operator() (const T& lhs, const T& rhs) const
+ { return lhs == rhs; }
+};
+
+struct TypenameVisitor : public boost::static_visitor<std::string>
+{
+ std::string operator() (const boost::blank&) const { return "none"; }
+ std::string operator() (const int8_t&) const { return "int8_t"; }
+ std::string operator() (const uint8_t&) const { return "uint8_t"; }
+ std::string operator() (const int16_t&) const { return "int16_t"; }
+ std::string operator() (const uint16_t&) const { return "uint16_t"; }
+ std::string operator() (const int32_t&) const { return "int32_t"; }
+ std::string operator() (const uint32_t&) const { return "uint32_t"; }
+ std::string operator() (const float&) const { return "float"; }
+ std::string operator() (const std::string&) const { return "string"; }
+ std::string operator() (const std::vector<int8_t>&) const { return "vector<int8_t>"; }
+ std::string operator() (const std::vector<uint8_t>&) const { return "vector<uint8_t>"; }
+ std::string operator() (const std::vector<int16_t>&) const { return "vector<int16_t>"; }
+ std::string operator() (const std::vector<uint16_t>&) const { return "vector<uint16_t>"; }
+ std::string operator() (const std::vector<int32_t>&) const { return "vector<int32_t>"; }
+ std::string operator() (const std::vector<uint32_t>&) const { return "vector<uint32_t>"; }
+ std::string operator() (const std::vector<float>&) const { return "vector<float>"; }
+};
+
+} // namespace internal
+
+inline bool Tag::operator== (const Tag& other) const
+{
+ return boost::apply_visitor(internal::IsEqualVisitor(), data_, other.data_) &&
+ (modifier_ == other.modifier_) ;
+}
+
+inline bool Tag::operator!= (const Tag& other) const
+{ return !(*this == other); }
+
+inline bool Tag::HasModifier(const TagModifier m) const
+{
+ // we just allow one at a time (for now at least)
+ return modifier_ == m;
+}
+
+inline bool Tag::IsNull(void) const
+{ return Type() == TagDataType::INVALID; }
+
+inline bool Tag::IsInt8(void) const
+{ return Type() == TagDataType::INT8; }
+
+inline bool Tag::IsUInt8(void) const
+{ return Type() == TagDataType::UINT8; }
+
+inline bool Tag::IsInt16(void) const
+{ return Type() == TagDataType::INT16; }
+
+inline bool Tag::IsUInt16(void) const
+{ return Type() == TagDataType::UINT16; }
+
+inline bool Tag::IsInt32(void) const
+{ return Type() == TagDataType::INT32; }
+
+inline bool Tag::IsUInt32(void) const
+{ return Type() == TagDataType::UINT32; }
+
+inline bool Tag::IsFloat(void) const
+{ return Type() == TagDataType::FLOAT; }
+
+inline bool Tag::IsString(void) const
+{ return Type() == TagDataType::STRING; }
+
+inline bool Tag::IsHexString(void) const
+{ return IsString() && modifier_ == TagModifier::HEX_STRING; }
+
+inline bool Tag::IsInt8Array(void) const
+{ return Type() == TagDataType::INT8_ARRAY; }
+
+inline bool Tag::IsUInt8Array(void) const
+{ return Type() == TagDataType::UINT8_ARRAY; }
+
+inline bool Tag::IsInt16Array(void) const
+{ return Type() == TagDataType::INT16_ARRAY; }
+
+inline bool Tag::IsUInt16Array(void) const
+{ return Type() == TagDataType::UINT16_ARRAY; }
+
+inline bool Tag::IsInt32Array(void) const
+{ return Type() == TagDataType::INT32_ARRAY; }
+
+inline bool Tag::IsUInt32Array(void) const
+{ return Type() == TagDataType::UINT32_ARRAY; }
+
+inline bool Tag::IsFloatArray(void) const
+{ return Type() == TagDataType::FLOAT_ARRAY; }
+
+inline bool Tag::IsSignedInt(void) const
+{ return IsInt8() || IsInt16() || IsInt32(); }
+
+inline bool Tag::IsUnsignedInt(void) const
+{ return IsUInt8() || IsUInt16() || IsUInt32(); }
+
+inline bool Tag::IsIntegral(void) const
+{ return IsSignedInt() || IsUnsignedInt(); }
+
+inline bool Tag::IsNumeric(void) const
+{ return IsIntegral() || IsFloat(); }
+
+inline bool Tag::IsSignedArray(void) const
+{ return IsInt8Array() || IsInt16Array() || IsInt32Array(); }
+
+inline bool Tag::IsUnsignedArray(void) const
+{ return IsUInt8Array() || IsUInt16Array() || IsUInt32Array(); }
+
+inline bool Tag::IsIntegralArray(void) const
+{ return IsSignedArray() || IsUnsignedArray(); }
+
+inline bool Tag::IsArray(void) const
+{ return IsIntegralArray() || IsFloatArray(); }
+
+inline TagModifier Tag::Modifier(void) const
+{ return modifier_; }
+
+inline Tag& Tag::Modifier(const TagModifier m)
+{ modifier_ = m; return *this; }
+
+inline char Tag::ToAscii(void) const
+{ return boost::apply_visitor(internal::AsciiConvertVisitor(), data_); }
+
+inline int8_t Tag::ToInt8(void) const
+{
+ if (IsInt8())
+ return boost::get<int8_t>(data_);
+ return boost::apply_visitor(internal::ToInt8ConvertVisitor(), data_);
+}
+
+inline uint8_t Tag::ToUInt8(void) const
+{
+ if (IsUInt8())
+ return boost::get<uint8_t>(data_);
+ return boost::apply_visitor(internal::ToUInt8ConvertVisitor(), data_);
+}
+
+inline int16_t Tag::ToInt16(void) const
+{
+ if (IsInt16())
+ return boost::get<int16_t>(data_);
+ return boost::apply_visitor(internal::ToInt16ConvertVisitor(), data_);
+}
+
+inline uint16_t Tag::ToUInt16(void) const
+{
+ if (IsUInt16())
+ return boost::get<uint16_t>(data_);
+ return boost::apply_visitor(internal::ToUInt16ConvertVisitor(), data_);
+}
+
+inline int32_t Tag::ToInt32(void) const
+{
+ if (IsInt32())
+ return boost::get<int32_t>(data_);
+ return boost::apply_visitor(internal::ToInt32ConvertVisitor(), data_);
+}
+
+inline uint32_t Tag::ToUInt32(void) const
+{
+ if (IsUInt32())
+ return boost::get<uint32_t>(data_);
+ return boost::apply_visitor(internal::ToUInt32ConvertVisitor(), data_);
+}
+
+inline float Tag::ToFloat(void) const
+{ return boost::get<float>(data_); }
+
+inline std::string Tag::ToString(void) const
+{ return boost::get<std::string>(data_); }
+
+inline std::vector<int8_t> Tag::ToInt8Array(void) const
+{ return boost::get< std::vector<int8_t> >(data_); }
+
+inline std::vector<uint8_t> Tag::ToUInt8Array(void) const
+{ return boost::get< std::vector<uint8_t> >(data_); }
+
+inline std::vector<int16_t> Tag::ToInt16Array(void) const
+{ return boost::get< std::vector<int16_t> >(data_); }
+
+inline std::vector<uint16_t> Tag::ToUInt16Array(void) const
+{ return boost::get< std::vector<uint16_t> >(data_); }
+
+inline std::vector<int32_t> Tag::ToInt32Array(void) const
+{ return boost::get< std::vector<int32_t> >(data_); }
+
+inline std::vector<uint32_t> Tag::ToUInt32Array(void) const
+{ return boost::get< std::vector<uint32_t> >(data_); }
+
+inline std::vector<float> Tag::ToFloatArray(void) const
+{ return boost::get< std::vector<float> >(data_); }
+
+inline TagDataType Tag::Type(void) const
+{ return TagDataType(data_.which() ); }
+
+inline std::string Tag::Typename(void) const
+{ return boost::apply_visitor(internal::TypenameVisitor(), data_); }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Validator.inl
+/// \brief Inline implementations for the Validator class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Validator.h"
+#include <stdexcept>
+
+namespace PacBio {
+namespace BAM {
+
+inline bool Validator::IsValid(const BamFile& file, const bool entireFile)
+{
+ try {
+ if (entireFile)
+ ValidateEntireFile(file, 1);
+ else
+ ValidateFileMetadata(file, 1);
+ return true;
+ } catch (std::exception&) {
+ return false;
+ }
+}
+
+inline bool Validator::IsValid(const BamHeader& header)
+{
+ try {
+ Validate(header, 1);
+ return true;
+ } catch (std::exception&) {
+ return false;
+ }
+}
+
+inline bool Validator::IsValid(const BamRecord& record)
+{
+ try {
+ Validate(record, 1);
+ return true;
+ } catch (std::exception&) {
+ return false;
+ }
+}
+
+inline bool Validator::IsValid(const ReadGroupInfo& rg)
+{
+ try {
+ Validate(rg, 1);
+ return true;
+ } catch (std::exception&) {
+ return false;
+ }
+}
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualPolymeraseBamRecord.h
+/// \brief Defines the VirtualPolymeraseBamRecord class.
+//
+// Author: Armin Töpfer
+
+#ifndef VIRTUALPOLYMERASEBAMRECORD_H
+#define VIRTUALPOLYMERASEBAMRECORD_H
+
+#include "pbbam/virtual/VirtualZmwBamRecord.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \deprecated Use VirtualZmwBamRecord instead.
+typedef VirtualZmwBamRecord VirtualPolymeraseBamRecord;
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // VIRTUALPOLYMERASEBAMRECORD_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualPolymeraseCompositeReader.h
+/// \brief Defines the VirtualPolymeraseCompositeReader class.
+//
+// Author: Derek Barnett
+
+#ifndef VIRTUALPOLYMERASECOMPOSITEREADER_H
+#define VIRTUALPOLYMERASECOMPOSITEREADER_H
+
+#include "pbbam/virtual/VirtualPolymeraseBamRecord.h"
+#include "pbbam/virtual/ZmwReadStitcher.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \deprecated Use ZmwReadStitcher instead.
+typedef ZmwReadStitcher VirtualPolymeraseCompositeReader;
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // VIRTUALPOLYMERASECOMPOSITEREADER_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualPolymeraseReader.h
+/// \brief Defines the VirtualPolymeraseReader class.
+//
+// Author: Armin Töpfer
+
+#ifndef VIRTUALPOLYMERASEREADER_H
+#define VIRTUALPOLYMERASEREADER_H
+
+#include "pbbam/virtual/VirtualPolymeraseBamRecord.h"
+#include "pbbam/virtual/ZmwReadStitcher.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \deprecated Use ZmwReadStitcher instead.
+typedef ZmwReadStitcher VirtualPolymeraseReader;
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // VIRTUALPOLYMERASEREADER_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualRegion.h
+/// \brief Defines the VirtualRegion class.
+//
+// Author: Armin Töpfer
+
+#ifndef VIRTUALREGION_H
+#define VIRTUALREGION_H
+
+#include "pbbam/Config.h"
+#include "pbbam/virtual/VirtualRegionType.h"
+#include "pbbam/LocalContextFlags.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The VirtualRegion represents an annotation of a polymerase region.
+///
+struct VirtualRegion
+{
+public:
+ VirtualRegionType type;
+ int beginPos;
+ int endPos;
+ LocalContextFlags cxTag = LocalContextFlags::NO_LOCAL_CONTEXT;
+ int barcodeLeft = -1;
+ int barcodeRight = -1;
+ int score = 0;
+
+public:
+ /// \brief Creates a virtual region with basic type & position info.
+ ///
+ VirtualRegion(const VirtualRegionType type,
+ const int beginPos,
+ const int endPos,
+ const int score = 0);
+
+ /// \brief Creates a virtual region with type/position info, as well as context & barcode.
+ ///
+ VirtualRegion(const VirtualRegionType type,
+ const int beginPos,
+ const int endPos,
+ const LocalContextFlags cxTag,
+ const int barcodeLeft,
+ const int barcodeRight,
+ const int score = 0);
+
+ VirtualRegion(void) = default;
+ VirtualRegion(const VirtualRegion&) = default;
+ VirtualRegion(VirtualRegion&&) = default;
+ VirtualRegion& operator=(const VirtualRegion&) = default; // un-"delete"-ed for SWIG
+ VirtualRegion& operator=(VirtualRegion&&) = default;
+ ~VirtualRegion(void) = default;
+
+ bool operator==(const VirtualRegion &v1) const;
+
+};
+
+inline VirtualRegion::VirtualRegion(const VirtualRegionType type,
+ const int beginPos,
+ const int endPos,
+ const int score)
+ : type(type)
+ , beginPos(beginPos)
+ , endPos(endPos), cxTag()
+ , score(score)
+{}
+
+inline VirtualRegion::VirtualRegion(const VirtualRegionType type,
+ const int beginPos,
+ const int endPos,
+ const LocalContextFlags cxTag,
+ const int barcodeLeft,
+ const int barcodeRight,
+ const int score)
+ : type(type)
+ , beginPos(beginPos)
+ , endPos(endPos)
+ , cxTag(cxTag)
+ , barcodeLeft(barcodeLeft)
+ , barcodeRight(barcodeRight)
+ , score(score)
+{}
+
+inline bool VirtualRegion::operator==(const VirtualRegion& v1) const
+{
+ return (v1.type == this->type &&
+ v1.beginPos == this->beginPos &&
+ v1.endPos == this->endPos);
+}
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // VIRTUALREGION_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualRegionType.h
+/// \brief Defines the VirtualRegionType enum.
+//
+// Author: Derek Barnett
+
+#ifndef REGIONTYPE_H
+#define REGIONTYPE_H
+
+#include "pbbam/Config.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief This enum defines the types of annotated region.
+///
+enum class VirtualRegionType // : char
+{
+ ADAPTER = 0x41, ///< Adapter region ('A')
+ BARCODE = 0x42, ///< Barcode region ('B')
+ FILTERED = 0x46, ///< Filtered subread ('F')
+ SUBREAD = 0x53, ///< Subread ('S')
+ HQREGION = 0x48, ///< High-quality region ('H')
+ LQREGION = 0x4C ///< Low-quality region ('L'), i.e. outside the HQ region
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // REGIONTYPE_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualRegionTypeMap.h
+/// \brief Defines the VirtualRegionTypeMap class.
+//
+// Author: Derek Barnett
+
+#ifndef VIRTUALREGIONTYPEMAP_H
+#define VIRTUALREGIONTYPEMAP_H
+
+#include <map>
+
+#include "pbbam/Config.h"
+#include "pbbam/virtual/VirtualRegionType.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The VirtualRegionTypeMap class provides mapping between char codes and
+/// VirtualRegionType enum keys.
+///
+class VirtualRegionTypeMap
+{
+public:
+ static std::map<char, VirtualRegionType> ParseChar;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // VIRTUALREGIONTYPEMAP_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualZmwBamRecord.h
+/// \brief Defines the VirtualZmwBamRecord class.
+//
+// Author: Armin Töpfer
+
+#ifndef VirtualZmwBAMRECORD_H
+#define VirtualZmwBAMRECORD_H
+
+#include <vector>
+#include <sstream>
+
+#include "pbbam/BamHeader.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/Config.h"
+#include "pbbam/virtual/VirtualRegion.h"
+#include "pbbam/virtual/VirtualRegionType.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief The VirtualZmwBamRecord class represents a ZMW read stitched
+/// on-the-fly from subreads|hqregion + scraps.
+///
+class VirtualZmwBamRecord : public BamRecord
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates a "virtual" ZMW %BAM record, by re-stitching its
+ /// constituent segments.
+ ///
+ /// \param[in] unorderedSources source data (subreads, scraps, etc.)
+ /// \param[in] header %BAM header to associate with the new record
+ ///
+ /// \throws std::runtime_error on failure to stitch virtual record
+ ///
+ VirtualZmwBamRecord(std::vector<BamRecord>&& unorderedSources,
+ const BamHeader& header);
+
+ VirtualZmwBamRecord(void) = delete;
+ VirtualZmwBamRecord(const VirtualZmwBamRecord&) = default;
+ VirtualZmwBamRecord(VirtualZmwBamRecord&&) = default;
+ VirtualZmwBamRecord& operator=(const VirtualZmwBamRecord&) = default;
+ VirtualZmwBamRecord& operator=(VirtualZmwBamRecord&&) = default;
+ virtual ~VirtualZmwBamRecord() = default;
+
+ /// \}
+
+public:
+ /// \name Virtual Record Attributes
+ ///
+
+ /// \returns true if requested VirtualRegionType has been annotated.
+ ///
+ bool HasVirtualRegionType(const VirtualRegionType regionType) const;
+
+ /// \returns IPD frame data
+ ///
+ Frames IPDV1Frames(Orientation orientation = Orientation::NATIVE) const;
+
+ /// \brief Provides all annotations of the polymerase read as a map (type => regions)
+ ///
+ std::map<VirtualRegionType, std::vector<VirtualRegion>> VirtualRegionsMap(void) const;
+
+ /// \brief Provides annotations of the polymerase read for a given VirtualRegionType.
+ ///
+ /// \param[in] regionType requested region type
+ /// \returns regions that match the requested type (empty vector if none found).
+ ///
+ std::vector<VirtualRegion> VirtualRegionsTable(const VirtualRegionType regionType) const;
+
+ /// \}
+
+private:
+ std::vector<BamRecord> sources_;
+ std::map<VirtualRegionType, std::vector<VirtualRegion>> virtualRegionsMap_;
+
+private:
+ void StitchSources(void);
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // VirtualZmwBAMRECORD_H
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file WhitelistedZmwReadStitcher.h
+/// \brief Defines the ZmwReadStitcher class.
+//
+// Author: Derek Barnett
+
+#ifndef WHITELISTEDZMWREADSTITCHER_H
+#define WHITELISTEDZMWREADSTITCHER_H
+
+#include "pbbam/Config.h"
+#include "pbbam/virtual/VirtualZmwBamRecord.h"
+#include <memory>
+#include <vector>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+class DataSet;
+class PbiFilter;
+
+/// \brief The WhitelistedZmwReadStitcher class provides an interface for
+/// re-stitching "virtual" ZMW reads from their constituent parts,
+/// limiting results to only those reads originating from a 'whitelist'
+/// of ZMW hole numbers.
+///
+/// Whitelisted ZMWs that are not present in both primary and scraps BAMs
+/// will be "pre-removed." This ensures that, given client code like this:
+///
+/// \include code/WhitelistedZmwReadStitcher.txt
+///
+/// each iteration will always provide valid data - either a valid virtual
+/// record from Next() or a non-empty vector from NextRaw().
+///
+/// \note This reader requires that both input %BAM files also have associated
+/// PBI files available for query. See BamFile::EnsurePacBioIndexExists .
+///
+class PBBAM_EXPORT WhitelistedZmwReadStitcher
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// \brief Creates a reader that will operate on a primary %BAM file (e.g. subread data)
+ /// and a scraps file, using a ZMW whitelist to filter the input.
+ ///
+ /// \param[in] zmwWhitelist list of ZMWs to restrict iteration over
+ /// \param[in] primaryBamFilePath hqregion.bam or subreads.bam file path
+ /// \param[in] scrapsBamFilePath scraps.bam file path
+ ///
+ /// \note This reader requires that both input %BAM files also have associated PBI
+ /// files available for query. See BamFile::EnsurePacBioIndexExists .
+ ///
+ /// \throws std::runtime_error if any files (*.bam and/or *.pbi) were not available for reading, or
+ /// if malformed data encountered
+ ///
+ WhitelistedZmwReadStitcher(const std::vector<int32_t>& zmwWhitelist,
+ const std::string& primaryBamFilePath,
+ const std::string& scrapsBamFilePath);
+
+ WhitelistedZmwReadStitcher(void) = delete;
+ WhitelistedZmwReadStitcher(const WhitelistedZmwReadStitcher&) = delete;
+ WhitelistedZmwReadStitcher(WhitelistedZmwReadStitcher&&) = delete;
+ WhitelistedZmwReadStitcher& operator=(const WhitelistedZmwReadStitcher&) = delete;
+ WhitelistedZmwReadStitcher& operator=(WhitelistedZmwReadStitcher&&) = delete;
+ ~WhitelistedZmwReadStitcher(void);
+
+ /// \}
+
+public:
+ /// \name Stitched Record Reading
+ /// \{
+
+ /// \returns true if more ZMWs are available for reading.
+ bool HasNext(void) const;
+
+ /// \returns the re-stitched polymerase read from the next ZMW in the whitelist
+ VirtualZmwBamRecord Next(void);
+
+ /// \returns the set of reads that belong to the next ZMW in the whitelist.
+ /// This enables stitching records in a distinct thread.
+ ///
+ std::vector<BamRecord> NextRaw(void);
+
+ /// \}
+
+public:
+ /// \name File Headers
+ /// \{
+
+ /// \returns the BamHeader associated with this reader's "primary" %BAM file
+ BamHeader PrimaryHeader(void) const;
+
+ /// \returns the BamHeader associated with this reader's "scraps" %BAM file
+ BamHeader ScrapsHeader(void) const;
+
+ /// \}
+
+private:
+ struct WhitelistedZmwReadStitcherPrivate;
+ std::unique_ptr<WhitelistedZmwReadStitcherPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // WHITELISTEDZMWREADSTITCHER
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwReadStitcher.h
+/// \brief Defines the ZmwReadStitcher class.
+//
+// Author: Derek Barnett
+
+#ifndef ZMWREADSTITCHER_H
+#define ZMWREADSTITCHER_H
+
+#include "pbbam/BamRecord.h"
+#include "pbbam/Config.h"
+#include "pbbam/virtual/VirtualZmwBamRecord.h"
+#include <memory>
+#include <vector>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+
+class DataSet;
+class PbiFilter;
+
+/// \brief The ZmwReadStitcher class provides an interface for re-stitching
+/// "virtual" polymerase reads from their constituent parts.
+///
+/// \note This reader requires that any input %BAM files also have associated PBI
+/// files available for query. See BamFile::EnsurePacBioIndexExists .
+///
+class PBBAM_EXPORT ZmwReadStitcher
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ /// entire file, from BAM names
+ ZmwReadStitcher(const std::string& primaryBamFilePath,
+ const std::string& scrapsBamFilePath);
+
+ /// filtered input from BAM names
+ ZmwReadStitcher(const std::string& primaryBamFilePath,
+ const std::string& scrapsBamFilePath,
+ const PbiFilter& filter);
+
+ /// maybe filtered, from DataSet input
+ ZmwReadStitcher(const DataSet& dataset);
+
+ ZmwReadStitcher(void) = delete;
+ ZmwReadStitcher(const ZmwReadStitcher&) = delete;
+ ZmwReadStitcher(ZmwReadStitcher&&) = delete;
+ ZmwReadStitcher& operator=(const ZmwReadStitcher&) = delete;
+ ZmwReadStitcher& operator=(ZmwReadStitcher&&) = delete;
+ ~ZmwReadStitcher(void);
+
+ /// \}
+
+public:
+ /// \name File Headers
+ /// \{
+
+ /// \returns the BamHeader associated with this reader's "primary" %BAM file
+ BamHeader PrimaryHeader(void) const;
+
+ /// \returns the BamHeader associated with this reader's "scraps" %BAM file
+ BamHeader ScrapsHeader(void) const;
+
+ /// \}
+
+public:
+ /// \name Stitched Record Reading
+ ///
+
+ /// \returns true if more ZMWs are available for reading.
+ bool HasNext(void);
+
+ /// \returns the next stitched polymerase read
+ VirtualZmwBamRecord Next(void);
+
+ /// \returns the next set of reads that belong to one ZMW.
+ /// This enables stitching records in a distinct thread.
+ ///
+ std::vector<BamRecord> NextRaw(void);
+
+ /// \}
+
+private:
+ struct ZmwReadStitcherPrivate;
+ std::unique_ptr<ZmwReadStitcherPrivate> d_;
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // ZMWREADSTITCHER_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwWhitelistVirtualReader.h
+/// \brief Defines the ZmwWhitelistVirtualReader class.
+//
+// Author: Derek Barnett
+
+#ifndef ZMWWHITELISTVIRTUALREADER_H
+#define ZMWWHITELISTVIRTUALREADER_H
+
+#include "pbbam/virtual/WhitelistedZmwReadStitcher.h"
+
+namespace PacBio {
+namespace BAM {
+
+/// \deprecated Use WhitelistedZmwReadStitcher instead.
+typedef WhitelistedZmwReadStitcher ZmwWhitelistVirtualReader;
+
+} // namespace BAM
+} // namespace PacBio
+
+#endif // ZMWWHITELISTVIRTUALREADER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Accuracy.cpp
+/// \brief Implements the Accuracy class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Accuracy.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+const float Accuracy::MIN = 0.0f;
+const float Accuracy::MAX = 1.0f;
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file AlignmentPrinter.cpp
+/// \brief Implements the AlignmentPrinter class.
+//
+// Author: Armin Töpfer
+
+#include "pbbam/AlignmentPrinter.h"
+
+#include <cmath>
+#include <iostream>
+#include <iomanip>
+#include <stdexcept>
+#include <sstream>
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+AlignmentPrinter::AlignmentPrinter(const IndexedFastaReader& ifr)
+ : ifr_(std::unique_ptr<IndexedFastaReader>(new IndexedFastaReader(ifr)))
+{ }
+
+std::string AlignmentPrinter::Print(const BamRecord& record,
+ const Orientation orientation)
+{
+ std::string seq = record.Sequence(orientation, true, true);
+ std::string ref = ifr_->ReferenceSubsequence(record, orientation, true, true);
+
+ if (seq.size() != ref.size())
+ throw std::runtime_error("Sequence and reference parts are of different size");
+
+ int seqLength = 0;
+ float matches = 0;
+ std::string pretty;
+ Position refCoord = record.ReferenceStart();
+ Position seqCoord = record.QueryStart();
+
+ for (size_t i = 0; i < seq.size();)
+ {
+ std::string refCoordStr = std::to_string(refCoord);
+ std::string seqCoordStr = std::to_string(seqCoord);
+
+ size_t maxCoordLength = std::max(refCoordStr.size(), seqCoordStr.size());
+ while (refCoordStr.size() < maxCoordLength)
+ refCoordStr = " "+refCoordStr;
+ while (seqCoordStr.size() < maxCoordLength)
+ seqCoordStr = " "+seqCoordStr;
+
+ std::string seqWrap = seqCoordStr + " : ";
+ std::string refWrap = refCoordStr + " : ";
+ std::string prettyWrap(maxCoordLength+3, ' ');
+ prettyWrap.reserve(seq.size());
+ for (int j = 0; i < seq.size() && j < 40; ++i, ++j)
+ {
+ refWrap += ref[i];
+
+ if (seq[i] == ref[i])
+ {
+ ++matches;
+ if (refCoord == 0 || refCoord % 10)
+ prettyWrap += '|';
+ else
+ {
+ prettyWrap += "\033[1m\x1b[31m";
+ prettyWrap += '|';
+ prettyWrap += "\033[0m\x1b[39;49m";
+ }
+ seqWrap += seq[i];
+ }
+ else if (seq[i] == '-' || ref[i] == '-')
+ {
+ prettyWrap += ' ';
+ seqWrap += seq[i];
+ }
+ else
+ {
+ prettyWrap += '.';
+ seqWrap += "\033[1m\x1b[31m";
+ seqWrap += seq[i];
+ seqWrap += "\033[0m\x1b[39;49m";
+ }
+ if (seq[i] != '-')
+ {
+ ++seqLength;
+ ++seqCoord;
+ }
+ if (ref[i] != '-')
+ {
+ ++refCoord;
+ }
+ }
+
+ refCoordStr = std::to_string(refCoord);
+ seqCoordStr = std::to_string(seqCoord);
+
+ maxCoordLength = std::max(refCoordStr.size(), seqCoordStr.size());
+ while (refCoordStr.size() < maxCoordLength)
+ refCoordStr = " "+refCoordStr;
+ while (seqCoordStr.size() < maxCoordLength)
+ seqCoordStr = " "+seqCoordStr;
+
+ seqWrap += " : " + seqCoordStr;
+ refWrap += " : " + refCoordStr;
+
+ pretty += refWrap + '\n' + prettyWrap + '\n' + seqWrap + "\n\n";
+ }
+ float similarity = matches/seq.size();
+
+ std::stringstream output;
+
+ output << "Read : " << record.FullName() << std::endl;
+ output << "Reference : " << record.ReferenceName() << std::endl;
+ output << std::endl;
+ output << "Read-length : " << seqLength << std::endl;
+ output << "Concordance : " << std::setprecision(3) << (similarity);
+ output << std::endl;
+ output << std::endl;
+ output << pretty;
+
+ return output.str();
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "AssertUtils.h"
+#include <cstdarg>
+#include <cstdio>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static inline
+void message_out(FILE* stream,
+ const char* format,
+ const char* msg)
+{
+ fprintf(stream, format, msg);
+ fprintf(stream, "\n");
+ fflush(stream);
+}
+
+void printInfo(const char* msg, ...) {
+
+ va_list ap;
+ va_start(ap, msg);
+
+ char buffer[256] = {'\0' };
+ buffer[255] = '\0';
+ if (msg)
+ vsnprintf(buffer, 255, msg, ap);
+ va_end(ap);
+
+ message_out(stdout, "%s", buffer);
+}
+
+void printError(const char* msg, ...) {
+
+ va_list ap;
+ va_start(ap, msg);
+
+ char buffer[256] = {'\0' };
+ buffer[255] = '\0';
+ if (msg)
+ vsnprintf(buffer, 255, msg, ap);
+ va_end(ap);
+
+ message_out(stderr, "%s", buffer);
+}
+
+void printFailedAssert(const char* msg) {
+ printError("ASSERT FAILED: %s", msg);
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef ASSERTUTILS_H
+#define ASSERTUTILS_H
+
+// ---------------------------------------------------
+// This file contains dev/debugging helper utilities
+// ---------------------------------------------------
+
+#ifndef PBBAM_UNUSED
+# define PBBAM_UNUSED(x) (void)x;
+#endif
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+inline void pbbam_noop(void) { }
+
+// a la fprintf(...). Auto-adds a newline
+void printError(const char* msg, ...);
+void printInfo(const char* msg, ...);
+void printFailedAssert(const char* msg);
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+//
+// This assert construct below allows us to report failures as well as take some
+// fallback action (return, break, continue, etc) so as not to crash at runtime.
+// In other words, it's basically a 'weak' assert with customized information &
+// failure response.
+//
+// PB_VERIFY(cond) if condition fails, print message
+// PB_ASSERT(cond, action) if condition fails, print message & perform action
+// PB_ASSERT_OR_BREAK overload of ASSERT where action is 'break'
+// PB_ASSERT_OR_CONTINUE overload of ASSERT where action is 'continue'
+// PB_ASSERT_OR_RETURN overload of ASSERT where action is 'return'
+// PB_ASSERT_OR_RETURN_VALUE overload of ASSERT where action is 'return <value>'
+// PB_ASSERT_UNREACHABLE overload of ASSERT(false) where action is a no-op. Used as a visual marker for
+// unreachable code-paths (e.g. invalid values in a switch statement)
+//
+#define PB_ASSERT_STRINGIFY2(x) #x
+#define PB_ASSERT_STRINGIFY(x) PB_ASSERT_STRINGIFY2(x)
+#define PB_ASSERT_STRING(cond) ::PacBio::BAM::internal::printFailedAssert( \
+ "\"" cond"\" in file " __FILE__ ", line " PB_ASSERT_STRINGIFY(__LINE__))
+
+#define PB_VERIFY(cond) if (cond) {} else { PB_ASSERT_STRING(#cond); } do {} while (0)
+#define PB_ASSERT(cond, action) if (cond) {} else { PB_ASSERT_STRING(#cond); action; } do {} while (0)
+#define PB_ASSERT_OR_BREAK(cond) PB_ASSERT(cond, break)
+#define PB_ASSERT_OR_CONTINUE(cond) PB_ASSERT(cond, continue)
+#define PB_ASSERT_OR_RETURN(cond) PB_ASSERT(cond, return)
+#define PB_ASSERT_OR_RETURN_VALUE(cond, value) PB_ASSERT(cond, return value)
+
+#define PB_ASSERT_UNREACHABLE PB_ASSERT(false, ::PacBio::BAM::internal::pbbam_noop())
+
+#endif // ASSERTUTILS_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BaiIndexedBamReader.cpp
+/// \brief Implements the BaiIndexedBamReader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BaiIndexedBamReader.h"
+#include "MemoryUtils.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct BaiIndexedBamReaderPrivate
+{
+public:
+ BaiIndexedBamReaderPrivate(const BamFile& file,
+ const GenomicInterval& interval)
+ : htsIndex_(nullptr)
+ , htsIterator_(nullptr)
+ {
+ LoadIndex(file.Filename());
+ Interval(file.Header(), interval);
+ }
+
+ void Interval(const BamHeader& header,
+ const GenomicInterval& interval)
+ {
+ htsIterator_.reset(nullptr);
+
+ if (header.HasSequence(interval.Name())) {
+ auto id = header.SequenceId(interval.Name());
+ if (id >= 0 && static_cast<size_t>(id) < header.NumSequences()) {
+ htsIterator_.reset(bam_itr_queryi(htsIndex_.get(),
+ id,
+ interval.Start(),
+ interval.Stop()));
+ }
+ }
+
+ if (!htsIterator_)
+ throw std::runtime_error("could not create iterator for requested region");
+ }
+
+ void LoadIndex(const string& fn)
+ {
+ htsIndex_.reset(bam_index_load(fn.c_str()));
+ if (!htsIndex_)
+ throw std::runtime_error("could not load BAI index data");
+ }
+
+ int ReadRawData(BGZF* bgzf, bam1_t* b)
+ {
+ assert(htsIterator_.get());
+ return hts_itr_next(bgzf, htsIterator_.get(), b, nullptr);
+ }
+
+public:
+ GenomicInterval interval_;
+ std::unique_ptr<hts_idx_t, internal::HtslibIndexDeleter> htsIndex_;
+ std::unique_ptr<hts_itr_t, internal::HtslibIteratorDeleter> htsIterator_;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+BaiIndexedBamReader::BaiIndexedBamReader(const GenomicInterval& interval,
+ const std::string& filename)
+ : BaiIndexedBamReader(interval, BamFile(filename))
+{ }
+
+BaiIndexedBamReader::BaiIndexedBamReader(const GenomicInterval& interval,
+ const BamFile& bamFile)
+ : BamReader(bamFile)
+ , d_(new BaiIndexedBamReaderPrivate(File(), interval))
+{ }
+
+BaiIndexedBamReader::BaiIndexedBamReader(const GenomicInterval& interval,
+ BamFile&& bamFile)
+ : BamReader(std::move(bamFile))
+ , d_(new BaiIndexedBamReaderPrivate(File(), interval))
+{ }
+
+const GenomicInterval& BaiIndexedBamReader::Interval(void) const
+{
+ assert(d_);
+ return d_->interval_;
+}
+
+int BaiIndexedBamReader::ReadRawData(BGZF* bgzf, bam1_t* b)
+{
+ assert(d_);
+ return d_->ReadRawData(bgzf, b);
+}
+
+BaiIndexedBamReader& BaiIndexedBamReader::Interval(const GenomicInterval& interval)
+{
+ assert(d_);
+ d_->Interval(Header(), interval);
+ return *this;
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamFile.cpp
+/// \brief Implements the BamFile class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamFile.h"
+#include "pbbam/PbiFile.h"
+#include "FileUtils.h"
+#include "MemoryUtils.h"
+#include <htslib/sam.h>
+#include <memory>
+#include <sstream>
+#include <cassert>
+#include <sys/stat.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class BamFilePrivate
+{
+public:
+ BamFilePrivate(const string& fn)
+ : filename_(fn)
+ , firstAlignmentOffset_(-1)
+ {
+ // ensure we've updated htslib verbosity with requested verbosity here
+ hts_verbose = ( PacBio::BAM::HtslibVerbosity == -1 ? 0 : PacBio::BAM::HtslibVerbosity);
+
+ // attempt open
+ auto f = RawOpen();
+
+#if !defined (PBBAM_NO_CHECK_EOF) || PBBAM_AUTOVALIDATE
+ // sanity check on file
+ const int eofCheck = bgzf_check_EOF(f->fp.bgzf);
+ if (eofCheck <= 0 ) {
+ // 1: EOF present & correct
+ // 2: not seekable (e.g. reading from stdin)
+ // 0: EOF absent
+ // -1: some other error
+ stringstream e;
+ if (eofCheck == 0)
+ e << fn << " : is missing EOF block" << endl;
+ else
+ e << fn << " : unknown error while checking EOF block" << endl;
+ throw std::runtime_error(e.str());
+ }
+#endif
+
+ // attempt fetch header
+ std::unique_ptr<bam_hdr_t, internal::HtslibHeaderDeleter> hdr(sam_hdr_read(f.get()));
+ header_ = internal::BamHeaderMemory::FromRawData(hdr.get());
+
+ // cache first alignment offset
+ firstAlignmentOffset_ = bgzf_tell(f->fp.bgzf);
+ }
+
+ unique_ptr<BamFilePrivate> DeepCopy(void)
+ {
+ return unique_ptr<BamFilePrivate>(new BamFilePrivate(filename_));
+ }
+
+ bool HasEOF(void) const
+ {
+ // streamed input is unknown, since it's not random-accessible
+ if (filename_ == "-")
+ return false;
+
+ // attempt open
+ auto f = RawOpen();
+ return RawEOFCheck(f) == 1;
+ }
+
+ int RawEOFCheck(const std::unique_ptr<samFile, internal::HtslibFileDeleter>& f) const
+ {
+ assert(f);
+ assert(f->fp.bgzf);
+ return bgzf_check_EOF(f->fp.bgzf);
+ }
+
+ std::unique_ptr<samFile, internal::HtslibFileDeleter> RawOpen(void) const
+ {
+ std::unique_ptr<samFile, internal::HtslibFileDeleter> f(sam_open(filename_.c_str(), "rb"));
+ if (!f || !f->fp.bgzf)
+ throw std::runtime_error(string("could not open BAM file: ") + filename_);
+ if (f->format.format != bam)
+ throw std::runtime_error("expected BAM, unknown format");
+ return f;
+ }
+
+public:
+ std::string filename_;
+ BamHeader header_;
+ int64_t firstAlignmentOffset_;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+// ------------------------
+// BamFile implementation
+// ------------------------
+
+BamFile::BamFile(const std::string& filename)
+ : d_(new internal::BamFilePrivate(filename))
+{ }
+
+BamFile::BamFile(const BamFile& other)
+ : d_(other.d_->DeepCopy())
+{ }
+
+BamFile::BamFile(BamFile&& other)
+ : d_(std::move(other.d_))
+{ }
+
+BamFile& BamFile::operator=(const BamFile& other)
+{
+ d_ = other.d_->DeepCopy();
+ return *this;
+}
+
+BamFile& BamFile::operator=(BamFile&& other)
+{ d_ = std::move(other.d_); return *this; }
+
+BamFile::~BamFile(void) { }
+
+void BamFile::CreatePacBioIndex(void) const
+{
+ PbiFile::CreateFrom(*this);
+}
+
+void BamFile::CreateStandardIndex(void) const
+{
+ if (bam_index_build(d_->filename_.c_str(), 0) != 0)
+ throw std::runtime_error("could not build BAI index");
+}
+
+void BamFile::EnsurePacBioIndexExists(void) const
+{
+ if (!PacBioIndexExists())
+ CreatePacBioIndex();
+}
+
+void BamFile::EnsureStandardIndexExists(void) const
+{
+ if (!StandardIndexExists())
+ CreateStandardIndex();
+}
+
+std::string BamFile::Filename(void) const
+{ return d_->filename_; }
+
+int64_t BamFile::FirstAlignmentOffset(void) const
+{ return d_->firstAlignmentOffset_; }
+
+bool BamFile::HasEOF(void) const
+{ return d_->HasEOF(); }
+
+bool BamFile::HasReference(const std::string& name) const
+{ return d_->header_.HasSequence(name); }
+
+const BamHeader& BamFile::Header(void) const
+{ return d_->header_; }
+
+bool BamFile::IsPacBioBAM(void) const
+{ return !d_->header_.PacBioBamVersion().empty(); }
+
+bool BamFile::PacBioIndexExists(void) const
+{ return internal::FileUtils::Exists(PacBioIndexFilename()); }
+
+std::string BamFile::PacBioIndexFilename(void) const
+{ return d_->filename_ + ".pbi"; }
+
+bool BamFile::PacBioIndexIsNewer(void) const
+{
+ const auto bamTimestamp = internal::FileUtils::LastModified(Filename());
+ const auto pbiTimestamp = internal::FileUtils::LastModified(PacBioIndexFilename());
+ return bamTimestamp <= pbiTimestamp;
+}
+
+int BamFile::ReferenceId(const std::string& name) const
+{ return d_->header_.SequenceId(name); }
+
+uint32_t BamFile::ReferenceLength(const std::string& name) const
+{ return ReferenceLength(ReferenceId(name)); }
+
+uint32_t BamFile::ReferenceLength(const int id) const
+{ return std::stoul(d_->header_.SequenceLength(id)); }
+
+std::string BamFile::ReferenceName(const int id) const
+{ return d_->header_.SequenceName(id); }
+
+bool BamFile::StandardIndexExists(void) const
+{ return internal::FileUtils::Exists(StandardIndexFilename()); }
+
+std::string BamFile::StandardIndexFilename(void) const
+{ return d_->filename_ + ".bai"; }
+
+bool BamFile::StandardIndexIsNewer(void) const
+{
+ const auto bamTimestamp = internal::FileUtils::LastModified(Filename());
+ const auto baiTimestamp = internal::FileUtils::LastModified(StandardIndexFilename());
+ return bamTimestamp <= baiTimestamp;
+}
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamHeader.cpp
+/// \brief Implements the BamHeader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamHeader.h"
+#include "StringUtils.h"
+#include "Version.h"
+#include <htslib/hts.h>
+#include <sstream>
+#include <set>
+#include <cassert>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static const string prefix_HD = string("@HD");
+static const string prefix_SQ = string("@SQ");
+static const string prefix_RG = string("@RG");
+static const string prefix_PG = string("@PG");
+static const string prefix_CO = string("@CO");
+
+static const string token_VN = string("VN");
+static const string token_SO = string("SO");
+static const string token_pb = string("pb");
+
+static inline
+bool CheckSortOrder(const string& lhs, const string& rhs)
+{ return lhs == rhs; }
+
+static inline
+bool CheckPbVersion(const string& lhs, const string& rhs)
+{
+ return ( Version{ lhs } >= Version::Minimum &&
+ Version{ rhs } >= Version::Minimum);
+}
+
+static inline
+bool CheckSequences(const string& sortOrder,
+ const vector<SequenceInfo>& lhs,
+ const vector<SequenceInfo>& rhs)
+{
+ return ( (sortOrder == "coordinate") ? lhs == rhs : true);
+}
+
+static
+void EnsureCanMerge(const BamHeader& lhs, const BamHeader& rhs)
+{
+ // check compatibility
+ const bool sortOrderOk = CheckSortOrder(lhs.SortOrder(), rhs.SortOrder());
+ const bool pbVersionOk = CheckPbVersion(lhs.PacBioBamVersion(), rhs.PacBioBamVersion());
+ const bool sequencesOk = CheckSequences(lhs.SortOrder(), lhs.Sequences(), rhs.Sequences());
+ if (sortOrderOk && pbVersionOk && sequencesOk)
+ return;
+
+ // if any checks failed, format error message & throw
+ stringstream e;
+ e << "could not merge BAM headers:" << endl;
+
+ if (!sortOrderOk) {
+ e << " mismatched sort orders (@HD:SO) : ("
+ << lhs.SortOrder() << ", " << rhs.SortOrder()
+ << ")" << endl;
+ }
+
+ if (!pbVersionOk) {
+ e << " incompatible PacBio BAM versions (@HD:pb) : ("
+ << lhs.PacBioBamVersion() << ", " << rhs.PacBioBamVersion()
+ << ")" << endl;
+ }
+
+ if (!sequencesOk)
+ e << " mismatched sequence lists (@SQ entries)" << endl;
+
+ throw std::runtime_error(e.str());
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+BamHeader::BamHeader(const string& samHeaderText)
+ : d_(new internal::BamHeaderPrivate)
+{
+ istringstream s(samHeaderText);
+ string line("");
+ string firstToken;
+ while (getline(s, line)) {
+
+ // skip if line is not long enough to contain true values
+ if (line.length() < 5)
+ continue;
+
+ // determine token at beginning of line
+ firstToken = line.substr(0,3);
+
+ if (firstToken == internal::prefix_HD) {
+
+ // pop off '@HD\t', then split HD lines into tokens
+ const vector<string>& tokens = internal::Split(line.substr(4), '\t');
+ for (const string& token : tokens) {
+ const string& tokenTag = token.substr(0,2);
+ const string& tokenValue = token.substr(3);
+
+ // set header contents
+ if (tokenTag == internal::token_VN) Version(tokenValue);
+ else if (tokenTag == internal::token_SO) SortOrder(tokenValue);
+ else if (tokenTag == internal::token_pb) PacBioBamVersion(tokenValue);
+ }
+
+ // check for required tags
+ if (Version().empty())
+ Version(string(hts_version()));
+ }
+
+ else if (firstToken == internal::prefix_SQ)
+ AddSequence(SequenceInfo::FromSam(line));
+
+ else if (firstToken == internal::prefix_RG)
+ AddReadGroup(ReadGroupInfo::FromSam(line));
+
+ else if (firstToken == internal::prefix_PG)
+ AddProgram(ProgramInfo::FromSam(line));
+
+ else if (firstToken == internal::prefix_CO)
+ AddComment(line.substr(4));
+ }
+}
+
+BamHeader& BamHeader::operator+=(const BamHeader& other)
+{
+ internal::EnsureCanMerge(*this, other);
+
+ // merge read groups
+ for (const auto& rg : other.ReadGroups()) {
+ if (!HasReadGroup(rg.Id()))
+ AddReadGroup(rg);
+ }
+
+ // merge programs
+ for (const auto& pg : other.Programs()) {
+ if (!HasProgram(pg.Id()))
+ AddProgram(pg);
+ }
+
+ // merge comments
+ for (const auto& comment : other.Comments())
+ AddComment(comment);
+
+ return *this;
+}
+
+BamHeader& BamHeader::AddSequence(const SequenceInfo& sequence)
+{
+ d_->sequences_.push_back(sequence);
+ d_->sequenceIdLookup_[sequence.Name()] = d_->sequences_.size() - 1;
+ return *this;
+}
+
+BamHeader& BamHeader::ClearSequences(void)
+{
+ d_->sequenceIdLookup_.clear();
+ d_->sequences_.clear();
+ return *this;
+}
+
+BamHeader BamHeader::DeepCopy(void) const
+{
+ BamHeader result;
+ result.d_->version_ = d_->version_;
+ result.d_->pacbioBamVersion_ = d_->pacbioBamVersion_;
+ result.d_->sortOrder_ = d_->sortOrder_;
+ result.d_->headerLineCustom_ = d_->headerLineCustom_;
+ result.d_->readGroups_ = d_->readGroups_;
+ result.d_->programs_ = d_->programs_;
+ result.d_->comments_ = d_->comments_;
+ result.d_->sequences_ = d_->sequences_;
+ result.d_->sequenceIdLookup_ = d_->sequenceIdLookup_;
+ return result;
+}
+
+BamHeader& BamHeader::PacBioBamVersion(const std::string& version)
+{
+ d_->pacbioBamVersion_ = version;
+ const auto fileVersion = internal::Version{ version };
+ if (fileVersion < internal::Version::Minimum) {
+ auto msg = string{ "invalid PacBio BAM version number" };
+ msg += ( "(" + fileVersion.ToString() + ")");
+ msg += string{ "is older than the minimum supported version" };
+ msg += ( "(" + internal::Version::Minimum.ToString() + ")");
+ throw std::runtime_error(msg);
+ }
+ return *this;
+}
+
+ProgramInfo BamHeader::Program(const std::string& id) const
+{
+ const auto iter = d_->programs_.find(id);
+ if (iter == d_->programs_.cend())
+ throw std::runtime_error("program ID not found");
+ return iter->second;
+}
+
+vector<string> BamHeader::ProgramIds(void) const
+{
+ vector<string> result;
+ result.reserve(d_->programs_.size());
+ const auto end = d_->programs_.cend();
+ auto iter = d_->programs_.cbegin();
+ for ( ; iter != end; ++iter )
+ result.push_back(iter->first);
+ return result;
+}
+
+vector<ProgramInfo> BamHeader::Programs(void) const
+{
+ vector<ProgramInfo> result;
+ result.reserve(d_->programs_.size());
+ const auto end = d_->programs_.cend();
+ auto iter = d_->programs_.cbegin();
+ for ( ; iter != end; ++iter )
+ result.push_back(iter->second);
+ return result;
+}
+
+BamHeader& BamHeader::Programs(const vector<ProgramInfo>& programs)
+{
+ d_->programs_.clear();
+ for (const ProgramInfo& pg : programs)
+ d_->programs_[pg.Id()] = pg;
+ return *this;
+}
+
+ReadGroupInfo BamHeader::ReadGroup(const std::string& id) const
+{
+ const auto iter = d_->readGroups_.find(id);
+ if (iter == d_->readGroups_.cend())
+ throw std::runtime_error("read group ID not found");
+ return iter->second;
+}
+
+vector<string> BamHeader::ReadGroupIds(void) const
+{
+ vector<string> result;
+ result.reserve(d_->readGroups_.size());
+ const auto end = d_->readGroups_.cend();
+ auto iter = d_->readGroups_.cbegin();
+ for ( ; iter != end; ++iter )
+ result.push_back(iter->first);
+ return result;
+}
+
+vector<ReadGroupInfo> BamHeader::ReadGroups(void) const
+{
+ vector<ReadGroupInfo> result;
+ result.reserve(d_->readGroups_.size());
+ const auto end = d_->readGroups_.cend();
+ auto iter = d_->readGroups_.cbegin();
+ for ( ; iter != end; ++iter )
+ result.push_back(iter->second);
+ return result;
+}
+
+BamHeader& BamHeader::ReadGroups(const vector<ReadGroupInfo>& readGroups)
+{
+ d_->readGroups_.clear();
+ for (const ReadGroupInfo& rg : readGroups)
+ d_->readGroups_[rg.Id()] = rg;
+ return *this;
+}
+
+SequenceInfo BamHeader::Sequence(const std::string& name) const
+{
+ // TODO: SequenceId(name) throws if not found, should we do so here as well?
+
+ const auto iter = d_->sequenceIdLookup_.find(name);
+ if (iter == d_->sequenceIdLookup_.cend())
+ return SequenceInfo();
+ const int index = iter->second;
+ assert(index >= 0 && (size_t)index < d_->sequences_.size());
+ return d_->sequences_.at(index);
+}
+
+int32_t BamHeader::SequenceId(const std::string& name) const
+{
+ const auto iter = d_->sequenceIdLookup_.find(name);
+ if (iter == d_->sequenceIdLookup_.cend())
+ throw std::runtime_error("sequence not found");
+ return iter->second;
+}
+
+vector<string> BamHeader::SequenceNames(void) const
+{
+ vector<string> result;
+ result.reserve(d_->sequences_.size());
+ const auto end = d_->sequences_.cend();
+ auto iter = d_->sequences_.cbegin();
+ for ( ; iter != end; ++iter )
+ result.push_back(iter->Name());
+ return result;
+}
+
+BamHeader& BamHeader::Sequences(const vector<SequenceInfo>& sequences)
+{
+ d_->sequences_.clear();
+ for (const SequenceInfo& seq : sequences)
+ AddSequence(seq);
+ return *this;
+}
+
+string BamHeader::ToSam(void) const
+{
+ // init stream
+ stringstream out("");
+
+ // @HD
+ const string& outputVersion = (d_->version_.empty() ? string(hts_version()) : d_->version_);
+ const string& outputSortOrder = (d_->sortOrder_.empty() ? string("unknown") : d_->sortOrder_);
+ const string& outputPbBamVersion = (d_->pacbioBamVersion_.empty() ? internal::Version::Current.ToString()
+ : d_->pacbioBamVersion_);
+
+ out << internal::prefix_HD
+ << internal::MakeSamTag(internal::token_VN, outputVersion)
+ << internal::MakeSamTag(internal::token_SO, outputSortOrder)
+ << internal::MakeSamTag(internal::token_pb, outputPbBamVersion)
+ << endl;
+
+ // @SQ
+ for (const SequenceInfo& seq : d_->sequences_)
+ out << seq.ToSam() << endl;
+
+ // @RG
+ for (const auto& rgIter : d_->readGroups_)
+ out << rgIter.second.ToSam() << endl;
+
+ // @PG
+ for (const auto& progIter : d_->programs_)
+ out << progIter.second.ToSam() << endl;
+
+ // @CO
+ for (const string& comment : d_->comments_)
+ out << internal::prefix_CO << '\t' << comment << endl;
+
+ // return result
+ return out.str();
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamReader.cpp
+/// \brief Implements the BamReader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamReader.h"
+#include "pbbam/Validator.h"
+#include "MemoryUtils.h"
+#include <htslib/bgzf.h>
+#include <htslib/hfile.h>
+#include <htslib/hts.h>
+#include <cassert>
+#include <cstdio>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct BamReaderPrivate
+{
+public:
+ BamReaderPrivate(const BamFile& bamFile)
+ : htsFile_(nullptr)
+ , bamFile_(bamFile)
+ {
+ DoOpen();
+ }
+
+ BamReaderPrivate(BamFile&& bamFile)
+ : htsFile_(nullptr)
+ , bamFile_(std::move(bamFile))
+ {
+ DoOpen();
+ }
+
+ void DoOpen(void) {
+
+ // fetch file pointer
+ htsFile_.reset(sam_open(bamFile_.Filename().c_str(), "rb"));
+ if (!htsFile_)
+ throw std::runtime_error("could not open BAM file for reading");
+ }
+
+public:
+ std::unique_ptr<samFile, internal::HtslibFileDeleter> htsFile_;
+ BamFile bamFile_;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+BamReader::BamReader(const string& fn)
+ : BamReader(BamFile(fn))
+{ }
+
+BamReader::BamReader(const BamFile& bamFile)
+ : d_(new internal::BamReaderPrivate(bamFile))
+{
+ // skip header
+ VirtualSeek(d_->bamFile_.FirstAlignmentOffset());
+}
+
+BamReader::BamReader(BamFile&& bamFile)
+ : d_(new internal::BamReaderPrivate(std::move(bamFile)))
+{
+ // skip header
+ VirtualSeek(d_->bamFile_.FirstAlignmentOffset());
+}
+
+BamReader::~BamReader(void) { }
+
+BGZF* BamReader::Bgzf(void) const
+{
+ assert(d_);
+ assert(d_->htsFile_);
+ assert(d_->htsFile_->fp.bgzf);
+ return d_->htsFile_->fp.bgzf;
+}
+
+const BamFile& BamReader::File(void) const
+{
+ assert(d_);
+ return d_->bamFile_;
+}
+
+std::string BamReader::Filename(void) const
+{
+ assert(d_);
+ return d_->bamFile_.Filename();
+}
+
+const BamHeader& BamReader::Header(void) const
+{
+ assert(d_);
+ return d_->bamFile_.Header();
+}
+
+bool BamReader::GetNext(BamRecord& record)
+{
+ assert(Bgzf());
+ assert(internal::BamRecordMemory::GetRawData(record).get());
+
+ auto result = ReadRawData(Bgzf(), internal::BamRecordMemory::GetRawData(record).get());
+
+ // success
+ if (result >= 0) {
+ internal::BamRecordMemory::UpdateRecordTags(record);
+ record.header_ = Header();
+ record.ResetCachedPositions();
+
+#if PBBAM_AUTOVALIDATE
+ Validator::Validate(record);
+#endif
+ return true;
+ }
+
+ // EOF or end-of-data range (not an error)
+ else if (result == -1)
+ return false;
+
+ // error corrupted file
+ else {
+ auto errorMsg = string{"corrupted BAM file: "};
+ if (result == -2)
+ errorMsg += "probably truncated";
+ else if (result == -3)
+ errorMsg += "could not read BAM record's' core data";
+ else if (result == -4)
+ errorMsg += "could not read BAM record's' variable-length data";
+ else
+ errorMsg += "unknown reason " + to_string(result);
+ errorMsg += string{" ("};
+ errorMsg += Filename();
+ errorMsg += string{")"};
+ throw std::runtime_error{errorMsg};
+ }
+}
+
+int BamReader::ReadRawData(BGZF* bgzf, bam1_t* b)
+{
+ return bam_read1(bgzf, b);
+}
+
+void BamReader::VirtualSeek(int64_t virtualOffset)
+{
+ auto result = bgzf_seek(Bgzf(), virtualOffset, SEEK_SET);
+ if (result != 0)
+ throw std::runtime_error("Failed to seek in BAM file");
+}
+
+int64_t BamReader::VirtualTell(void) const
+{
+ return bgzf_tell(Bgzf());
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecord.cpp
+/// \brief Implements the BamRecord class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamRecord.h"
+#include "pbbam/virtual/VirtualRegionTypeMap.h"
+#include "pbbam/ZmwTypeMap.h"
+#include "AssertUtils.h"
+#include "BamRecordTags.h"
+#include "MemoryUtils.h"
+#include "Pulse2BaseCache.h"
+#include "SequenceUtils.h"
+#include <boost/numeric/conversion/cast.hpp>
+#include <htslib/sam.h>
+#include <iostream>
+#include <stdexcept>
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// record type names
+static const string recordTypeName_ZMW = "ZMW";
+static const string recordTypeName_Polymerase = "POLYMERASE";
+static const string recordTypeName_HqRegion = "HQREGION";
+static const string recordTypeName_Subread = "SUBREAD";
+static const string recordTypeName_CCS = "CCS";
+static const string recordTypeName_Scrap = "SCRAP";
+static const string recordTypeName_Unknown = "UNKNOWN";
+
+static
+int32_t HoleNumberFromName(const string& fullName)
+{
+ const auto mainTokens = Split(fullName, '/');
+ if (mainTokens.size() != 3)
+ throw std::runtime_error("malformed record name");
+ return stoi(mainTokens.at(1));
+}
+
+static
+Position QueryEndFromName(const string& fullName)
+{
+ const auto mainTokens = Split(fullName, '/');
+ if (mainTokens.size() != 3)
+ throw std::runtime_error("malformed record name");
+ const auto queryTokens = Split(mainTokens.at(2), '_');
+ if (queryTokens.size() != 2)
+ throw std::runtime_error("malformed record name");
+ return stoi(queryTokens.at(1));
+}
+
+static
+Position QueryStartFromName(const string& fullName)
+{
+ const auto mainTokens = Split(fullName, '/');
+ if (mainTokens.size() != 3)
+ throw std::runtime_error("malformed record name");
+ const auto queryTokens = Split(mainTokens.at(2), '_');
+ if (queryTokens.size() != 2)
+ throw std::runtime_error("malformed record name");
+ return stoi(queryTokens.at(0));
+}
+
+static inline
+string Label(const BamRecordTag tag)
+{
+ return BamRecordTags::LabelFor(tag);
+}
+
+static
+BamRecordImpl* CreateOrEdit(const BamRecordTag tag,
+ const Tag& value,
+ BamRecordImpl* impl)
+{
+ if (impl->HasTag(tag))
+ impl->EditTag(tag, value);
+ else
+ impl->AddTag(tag, value);
+ return impl;
+}
+
+static
+pair<int32_t, int32_t> AlignedOffsets(const BamRecord& record,
+ const int seqLength)
+{
+ int32_t startOffset = 0;
+ int32_t endOffset = seqLength;
+
+ PBBAM_SHARED_PTR<bam1_t> b = internal::BamRecordMemory::GetRawData(record);
+ uint32_t* cigarData = bam_get_cigar(b.get());
+ const size_t numCigarOps = b->core.n_cigar;
+ if (numCigarOps > 0) {
+
+ // start offset
+ for (size_t i = 0; i < numCigarOps; ++i) {
+ const CigarOperationType type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
+ if (type == CigarOperationType::HARD_CLIP) {
+ if (startOffset != 0 && startOffset != seqLength) {
+ startOffset = -1;
+ break;
+ }
+ }
+ else if (type == CigarOperationType::SOFT_CLIP)
+ startOffset += bam_cigar_oplen(cigarData[i]);
+ else
+ break;
+ }
+
+ // end offset
+ for (int i = numCigarOps-1; i >= 0; --i) {
+ const CigarOperationType type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
+ if (type == CigarOperationType::HARD_CLIP) {
+ if (endOffset != 0 && endOffset != seqLength) {
+ endOffset = -1;
+ break;
+ }
+ }
+ else if (type == CigarOperationType::SOFT_CLIP)
+ endOffset -= bam_cigar_oplen(cigarData[i]);
+ else
+ break;
+
+ }
+
+ if (endOffset == 0)
+ endOffset = seqLength;
+ }
+ return std::make_pair(startOffset, endOffset);
+}
+
+template<typename T>
+T Clip(const T& input, const size_t pos, const size_t len)
+{
+ if (input.empty())
+ return T();
+ return T{ input.cbegin() + pos,
+ input.cbegin() + pos + len };
+}
+
+template<typename T>
+T ClipPulse(const T& input,
+ internal::Pulse2BaseCache* p2bCache,
+ const size_t pos,
+ const size_t len)
+{
+ assert(p2bCache);
+ if (input.empty())
+ return T();
+
+ // find start
+ size_t start = p2bCache->FindFirst();
+ size_t basesSeen = 0;
+ while (basesSeen < pos) {
+ start = p2bCache->FindNext(start);
+ ++basesSeen;
+ }
+
+ // find end
+ size_t end = start;
+ size_t seen = 1;
+ while (seen < len) {
+ end = p2bCache->FindNext(end);
+ ++seen;
+ }
+
+ // return clipped
+ return T{ input.cbegin() + start,
+ input.cbegin() + end + 1 };
+}
+
+template< class InputIt, class Size, class OutputIt>
+OutputIt Move_N(InputIt first, Size count, OutputIt result)
+{
+ return std::move(first, first+count, result);
+}
+
+template <typename F, typename N>
+static void ClipAndGapify(const BamRecordImpl& impl,
+ const bool aligned,
+ const bool exciseSoftClips,
+ F* seq,
+ N paddingNullValue,
+ N deletionNullValue)
+{
+ assert(seq);
+
+ const bool clipOrGapRequested = aligned || exciseSoftClips;
+ if (impl.IsMapped() && clipOrGapRequested)
+ {
+ // determine final container length
+ auto incrementsOutputLength = [](const CigarOperationType type,
+ const bool aligned,
+ const bool exciseSoftClips)
+ {
+ if (type == CigarOperationType::HARD_CLIP ||
+ type == CigarOperationType::REFERENCE_SKIP)
+ {
+ return false;
+ }
+ else if (type == CigarOperationType::SOFT_CLIP && exciseSoftClips)
+ {
+ return false;
+ }
+ else if (!aligned && (type == CigarOperationType::DELETION ||
+ type == CigarOperationType::PADDING))
+ {
+ return false;
+ }
+ else
+ return true;
+ };
+
+ size_t outputLength = 0;
+ const auto cigar = impl.CigarData();
+ for (const CigarOperation& op : cigar) {
+ if (incrementsOutputLength(op.Type(), aligned, exciseSoftClips))
+ outputLength += op.Length();
+ }
+
+ // move original data to temp, prep output container size
+ F originalSeq = std::move(*seq);
+ seq->resize(outputLength);
+
+ // apply CIGAR ops
+ size_t srcIndex = 0;
+ size_t dstIndex = 0;
+ for (const CigarOperation& op : cigar) {
+ const auto opType = op.Type();
+ const auto opLength = op.Length();
+
+ // nothing to do for hard-clipped & ref-skipped positions
+ if (opType == CigarOperationType::HARD_CLIP ||
+ opType == CigarOperationType::REFERENCE_SKIP)
+ {
+ continue;
+ }
+
+ // maybe skip soft-clipped positions
+ else if (opType == CigarOperationType::SOFT_CLIP) {
+ if (exciseSoftClips)
+ srcIndex += opLength;
+ else {
+ Move_N(originalSeq.begin() + srcIndex,
+ opLength,
+ seq->begin() + dstIndex);
+ srcIndex += opLength;
+ dstIndex += opLength;
+ }
+ }
+
+ // maybe add deletion/padding values
+ else if (aligned && opType == CigarOperationType::DELETION) {
+ for (size_t i = 0; i < opLength; ++i)
+ (*seq)[dstIndex++] = deletionNullValue;
+ }
+ else if (aligned && opType == CigarOperationType::PADDING) {
+ for (size_t i = 0; i < opLength; ++i)
+ (*seq)[dstIndex++] = paddingNullValue;
+ }
+
+ // all other CIGAR ops
+ else {
+ Move_N(originalSeq.begin() + srcIndex,
+ opLength,
+ seq->begin() + dstIndex);
+ srcIndex += opLength;
+ dstIndex += opLength;
+ }
+ }
+ }
+}
+
+static inline
+void ClipAndGapifyBases(const BamRecordImpl& impl,
+ const bool aligned,
+ const bool exciseSoftClips,
+ string* seq)
+{
+ ClipAndGapify<string, char>(impl, aligned, exciseSoftClips,
+ seq, char('*'), char('-'));
+}
+
+static inline
+void ClipAndGapifyFrames(const BamRecordImpl& impl,
+ const bool aligned,
+ const bool exciseSoftClips,
+ Frames* frames)
+{
+ assert(frames);
+ std::vector<uint16_t> data = std::move(frames->Data());
+ ClipAndGapify<std::vector<uint16_t>, uint16_t>(impl, aligned, exciseSoftClips,
+ &data, 0, 0);
+ frames->Data(data);
+}
+
+static inline
+void ClipAndGapifyPhotons(const BamRecordImpl& impl,
+ const bool aligned,
+ const bool exciseSoftClips,
+ std::vector<float>* data)
+{
+ ClipAndGapify<std::vector<float>, float>(impl, aligned, exciseSoftClips,
+ data, 0.0, 0.0);
+}
+
+static inline
+void ClipAndGapifyQualities(const BamRecordImpl& impl,
+ const bool aligned,
+ const bool exciseSoftClips,
+ QualityValues* quals)
+{
+ ClipAndGapify<QualityValues, QualityValue>(impl, aligned, exciseSoftClips,
+ quals, QualityValue(0), QualityValue(0));
+}
+
+static inline
+void ClipAndGapifyUInts(const BamRecordImpl& impl,
+ const bool aligned,
+ const bool exciseSoftClips,
+ std::vector<uint32_t>* data)
+{
+ ClipAndGapify<std::vector<uint32_t>, uint32_t>(impl, aligned, exciseSoftClips,
+ data, 0, 0);
+}
+
+static
+RecordType NameToType(const string& name)
+{
+ if (name == recordTypeName_Subread)
+ return RecordType::SUBREAD;
+ if (name == recordTypeName_ZMW || name == recordTypeName_Polymerase)
+ return RecordType::ZMW;
+ if (name == recordTypeName_HqRegion)
+ return RecordType::HQREGION;
+ if (name == recordTypeName_CCS)
+ return RecordType::CCS;
+ if (name == recordTypeName_Scrap)
+ return RecordType::SCRAP;
+ return RecordType::UNKNOWN;
+}
+
+static
+void OrientBasesAsRequested(string* bases,
+ Orientation current,
+ Orientation requested,
+ bool isReverseStrand,
+ bool isPulse)
+{
+ assert(bases);
+ if (current != requested && isReverseStrand) {
+ if (isPulse)
+ internal::ReverseComplementCaseSens(*bases);
+ else
+ internal::ReverseComplement(*bases);
+ }
+}
+
+template<typename Container> inline
+void OrientTagDataAsRequested(Container* data,
+ Orientation current,
+ Orientation requested,
+ bool isReverseStrand)
+{
+ assert(data);
+ if (current != requested && isReverseStrand)
+ std::reverse(data->begin(), data->end());
+}
+
+static inline
+bool ConsumesQuery(const CigarOperationType type)
+{ return (bam_cigar_type(static_cast<int>(type)) & 0x1) != 0; }
+
+static inline
+bool ConsumesReference(const CigarOperationType type)
+{ return (bam_cigar_type(static_cast<int>(type)) & 0x2) != 0; }
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+const float BamRecord::photonFactor = 10.0;
+
+BamRecord::BamRecord(void)
+ : alignedStart_(PacBio::BAM::UnmappedPosition)
+ , alignedEnd_(PacBio::BAM::UnmappedPosition)
+ , p2bCache_(nullptr)
+{ }
+
+BamRecord::BamRecord(const BamHeader& header)
+ : header_(header)
+ , alignedStart_(PacBio::BAM::UnmappedPosition)
+ , alignedEnd_(PacBio::BAM::UnmappedPosition)
+ , p2bCache_(nullptr)
+{ }
+
+BamRecord::BamRecord(const BamRecordImpl& impl)
+ : impl_(impl)
+ , alignedStart_(PacBio::BAM::UnmappedPosition)
+ , alignedEnd_(PacBio::BAM::UnmappedPosition)
+ , p2bCache_(nullptr)
+{ }
+
+BamRecord::BamRecord(BamRecordImpl&& impl)
+ : impl_(std::move(impl))
+ , alignedStart_(PacBio::BAM::UnmappedPosition)
+ , alignedEnd_(PacBio::BAM::UnmappedPosition)
+ , p2bCache_(nullptr)
+{ }
+
+BamRecord::BamRecord(const BamRecord& other)
+ : impl_(other.impl_)
+ , header_(other.header_)
+ , alignedStart_(other.alignedStart_)
+ , alignedEnd_(other.alignedEnd_)
+ , p2bCache_(nullptr) // just reset, for now at least
+{ }
+
+BamRecord::BamRecord(BamRecord&& other)
+ : impl_(std::move(other.impl_))
+ , header_(std::move(other.header_))
+ , alignedStart_(std::move(other.alignedStart_))
+ , alignedEnd_(std::move(other.alignedEnd_))
+ , p2bCache_(std::move(other.p2bCache_))
+{ }
+
+BamRecord& BamRecord::operator=(const BamRecord& other)
+{
+ impl_ = other.impl_;
+ header_ = other.header_;
+ alignedStart_ = other.alignedStart_;
+ alignedEnd_ = other.alignedEnd_;
+ p2bCache_.reset(nullptr); // just reset, for now at least
+ return *this;
+}
+
+BamRecord& BamRecord::operator=(BamRecord&& other)
+{
+ impl_ = std::move(other.impl_);
+ header_ = std::move(other.header_);
+ alignedStart_ = std::move(other.alignedStart_);
+ alignedEnd_ = std::move(other.alignedEnd_);
+ p2bCache_ = std::move(other.p2bCache_);
+ return *this;
+}
+
+BamRecord::~BamRecord(void) { }
+
+Position BamRecord::AlignedEnd(void) const
+{
+ if (alignedEnd_ == PacBio::BAM::UnmappedPosition)
+ CalculateAlignedPositions();
+ return alignedEnd_;
+}
+
+Position BamRecord::AlignedStart(void) const
+{
+ if (alignedStart_ == PacBio::BAM::UnmappedPosition)
+ CalculateAlignedPositions();
+ return alignedStart_;
+}
+
+Strand BamRecord::AlignedStrand(void) const
+{ return impl_.IsReverseStrand() ? Strand::REVERSE : Strand::FORWARD; }
+
+QualityValues BamRecord::AltLabelQV(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior) const
+{
+ return FetchQualities(BamRecordTag::ALT_LABEL_QV,
+ orientation,
+ aligned,
+ exciseSoftClips,
+ pulseBehavior);
+}
+
+BamRecord& BamRecord::AltLabelQV(const QualityValues& altLabelQVs)
+{
+ internal::CreateOrEdit(BamRecordTag::ALT_LABEL_QV,
+ altLabelQVs.Fastq(),
+ &impl_);
+ return *this;
+}
+
+string BamRecord::AltLabelTag(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior) const
+{
+ return FetchBases(BamRecordTag::ALT_LABEL_TAG,
+ orientation,
+ aligned,
+ exciseSoftClips,
+ pulseBehavior);
+}
+
+BamRecord& BamRecord::AltLabelTag(const string& tags)
+{
+ internal::CreateOrEdit(BamRecordTag::ALT_LABEL_TAG,
+ tags,
+ &impl_);
+ return *this;
+}
+
+int16_t BamRecord::BarcodeForward(void) const
+{ return Barcodes().first; }
+
+int16_t BamRecord::BarcodeReverse(void) const
+{ return Barcodes().second; }
+
+uint8_t BamRecord::BarcodeQuality(void) const
+{
+ const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::BARCODE_QUALITY);
+ const auto bq = impl_.TagValue(tagName);
+ if (bq.IsNull())
+ return 0; // ?? "missing" value for tags ?? should we consider boost::optional<T> for these kind of guys ??
+ return bq.ToUInt8();
+}
+
+BamRecord& BamRecord::BarcodeQuality(const uint8_t quality)
+{
+ internal::CreateOrEdit(BamRecordTag::BARCODE_QUALITY,
+ quality,
+ &impl_);
+ return *this;
+}
+
+std::pair<int16_t,int16_t> BamRecord::Barcodes(void) const
+{
+ const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::BARCODES);
+ const Tag& bc = impl_.TagValue(tagName);
+ if (bc.IsNull())
+ throw std::runtime_error("barcode tag (bc) was requested but is missing");
+
+ // NOTE: barcodes are still stored, per the spec, as uint16, even though
+ // we're now using them as int16_t in the API (bug 31511)
+ //
+ if (!bc.IsUInt16Array())
+ throw std::runtime_error("barcode tag (bc) is malformed: should be a uint16_t array of size==2.");
+ const auto bcArray = bc.ToUInt16Array();
+ if (bcArray.size() != 2)
+ throw std::runtime_error("barcode tag (bc) is malformed: should be a uint16_t array of size==2.");
+
+ return std::make_pair(boost::numeric_cast<int16_t>(bcArray[0]),
+ boost::numeric_cast<int16_t>(bcArray[1]));
+}
+
+BamRecord& BamRecord::Barcodes(const std::pair<int16_t,int16_t>& barcodeIds)
+{
+ const vector<uint16_t> data =
+ {
+ boost::numeric_cast<uint16_t>(barcodeIds.first),
+ boost::numeric_cast<uint16_t>(barcodeIds.second)
+ };
+ internal::CreateOrEdit(BamRecordTag::BARCODES,
+ data,
+ &impl_);
+ return *this;
+}
+
+void BamRecord::CalculateAlignedPositions(void) const
+{
+ // reset
+ ResetCachedPositions();
+
+ // skip if unmapped, or has no queryStart/End
+ if (!impl_.IsMapped())
+ return;
+
+ // get the query start/end
+ const size_t seqLength = impl_.SequenceLength();
+ const RecordType type = Type();
+ const Position qStart = (type == RecordType::CCS) ? Position(0) : QueryStart();
+ const Position qEnd = (type == RecordType::CCS) ? Position(seqLength) : QueryEnd();
+
+ if (qStart == PacBio::BAM::UnmappedPosition || qEnd == PacBio::BAM::UnmappedPosition)
+ return;
+
+ // determine clipped end ranges
+ const std::pair<int32_t, int32_t> alignedOffsets = internal::AlignedOffsets(*this, seqLength);
+ const int32_t startOffset = alignedOffsets.first;
+ const int32_t endOffset = alignedOffsets.second;
+ if (endOffset == -1 || startOffset == -1)
+ return; // TODO: handle error more??
+
+ // store aligned positions (polymerase read coordinates)
+ if (impl_.IsReverseStrand()) {
+ alignedStart_ = qStart + (seqLength - endOffset);
+ alignedEnd_ = qEnd - startOffset;
+ }
+ else {
+ alignedStart_ = qStart + startOffset;
+ alignedEnd_ = qEnd - (seqLength - endOffset);
+ }
+}
+
+void BamRecord::CalculatePulse2BaseCache(void) const
+{
+ // skip already calculated
+ if (p2bCache_)
+ return;
+
+ // else try to calculate p2b cache.
+ if (!HasPulseCall())
+ throw std::runtime_error("BamRecord cannot calculate pulse2base mapping without 'pc' tag.");
+ const auto pulseCalls = FetchBases(BamRecordTag::PULSE_CALL,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL);
+ p2bCache_.reset(new internal::Pulse2BaseCache{ pulseCalls });
+}
+
+Cigar BamRecord::CigarData(bool exciseAllClips) const
+{
+ auto isClippingOp = [](const CigarOperation& op)
+ {
+ const auto type = op.Type();
+ return type == CigarOperationType::SOFT_CLIP ||
+ type == CigarOperationType::HARD_CLIP;
+ };
+
+ auto cigar = impl_.CigarData();
+ if (exciseAllClips) {
+ cigar.erase(std::remove_if(cigar.begin(),
+ cigar.end(),
+ isClippingOp),
+ cigar.end());
+ }
+ return cigar;
+}
+
+BamRecord& BamRecord::Clip(const ClipType clipType,
+ const Position start,
+ const Position end)
+{
+ switch (clipType)
+ {
+ case ClipType::CLIP_NONE : return *this;
+ case ClipType::CLIP_TO_QUERY : return ClipToQuery(start, end);
+ case ClipType::CLIP_TO_REFERENCE : return ClipToReference(start, end);
+ default:
+ throw std::runtime_error("unsupported clip type requested");
+ }
+}
+
+void BamRecord::ClipFields(const size_t clipFrom,
+ const size_t clipLength)
+{
+ const bool isForwardStrand = (AlignedStrand() == Strand::FORWARD);
+
+ // clip seq, quals
+ string sequence = internal::Clip(Sequence(Orientation::NATIVE), clipFrom, clipLength);
+ QualityValues qualities = internal::Clip(Qualities(Orientation::NATIVE), clipFrom, clipLength);
+ if (!isForwardStrand) {
+ internal::ReverseComplement(sequence);
+ internal::Reverse(qualities);
+ }
+ impl_.SetSequenceAndQualities(sequence, qualities.Fastq());
+
+ // update BAM tags
+ TagCollection tags = impl_.Tags();
+ if (HasDeletionQV())
+ tags[internal::Label(BamRecordTag::DELETION_QV)] = internal::Clip(DeletionQV(Orientation::NATIVE), clipFrom, clipLength).Fastq();
+ if (HasInsertionQV())
+ tags[internal::Label(BamRecordTag::INSERTION_QV)] = internal::Clip(InsertionQV(Orientation::NATIVE), clipFrom, clipLength).Fastq();
+ if (HasMergeQV())
+ tags[internal::Label(BamRecordTag::MERGE_QV)] = internal::Clip(MergeQV(Orientation::NATIVE), clipFrom, clipLength).Fastq();
+ if (HasSubstitutionQV())
+ tags[internal::Label(BamRecordTag::SUBSTITUTION_QV)] = internal::Clip(SubstitutionQV(Orientation::NATIVE), clipFrom, clipLength).Fastq();
+ if (HasIPD())
+ tags[internal::Label(BamRecordTag::IPD)] = internal::Clip(IPD(Orientation::NATIVE).Data(), clipFrom, clipLength);
+ if (HasPulseWidth())
+ tags[internal::Label(BamRecordTag::PULSE_WIDTH)] = internal::Clip(PulseWidth(Orientation::NATIVE).Data(), clipFrom, clipLength);
+ if (HasDeletionTag())
+ tags[internal::Label(BamRecordTag::DELETION_TAG)] = internal::Clip(DeletionTag(Orientation::NATIVE), clipFrom, clipLength);
+ if (HasSubstitutionTag())
+ tags[internal::Label(BamRecordTag::SUBSTITUTION_TAG)] = internal::Clip(SubstitutionTag(Orientation::NATIVE), clipFrom, clipLength);
+
+ // internal BAM tags
+ if (HasPulseCall()) {
+
+ // ensure p2bCache initialized
+ CalculatePulse2BaseCache();
+ internal::Pulse2BaseCache* p2bCache = p2bCache_.get();
+
+ if (HasAltLabelQV())
+ tags[internal::Label(BamRecordTag::ALT_LABEL_QV)] = internal::ClipPulse(AltLabelQV(Orientation::NATIVE), p2bCache, clipFrom, clipLength).Fastq();
+ if (HasLabelQV())
+ tags[internal::Label(BamRecordTag::LABEL_QV)] = internal::ClipPulse(LabelQV(Orientation::NATIVE), p2bCache, clipFrom, clipLength).Fastq();
+ if (HasPulseMergeQV())
+ tags[internal::Label(BamRecordTag::PULSE_MERGE_QV)] = internal::ClipPulse(PulseMergeQV(Orientation::NATIVE), p2bCache, clipFrom, clipLength).Fastq();
+ if (HasAltLabelTag())
+ tags[internal::Label(BamRecordTag::ALT_LABEL_TAG)] = internal::ClipPulse(AltLabelTag(Orientation::NATIVE), p2bCache, clipFrom, clipLength);
+ if (HasPulseCall())
+ tags[internal::Label(BamRecordTag::PULSE_CALL)] = internal::ClipPulse(PulseCall(Orientation::NATIVE), p2bCache, clipFrom, clipLength);
+ if (HasPkmean())
+ tags[internal::Label(BamRecordTag::PKMEAN)] = EncodePhotons(internal::ClipPulse(Pkmean(Orientation::NATIVE), p2bCache, clipFrom, clipLength));
+ if (HasPkmid())
+ tags[internal::Label(BamRecordTag::PKMID)] = EncodePhotons(internal::ClipPulse(Pkmid(Orientation::NATIVE), p2bCache, clipFrom, clipLength));
+ if (HasPkmean2())
+ tags[internal::Label(BamRecordTag::PKMEAN_2)] = EncodePhotons(internal::ClipPulse(Pkmean2(Orientation::NATIVE), p2bCache, clipFrom, clipLength));
+ if (HasPkmid2())
+ tags[internal::Label(BamRecordTag::PKMID_2)] = EncodePhotons(internal::ClipPulse(Pkmid2(Orientation::NATIVE), p2bCache, clipFrom, clipLength));
+ if (HasPrePulseFrames())
+ tags[internal::Label(BamRecordTag::PRE_PULSE_FRAMES)] = internal::ClipPulse(PrePulseFrames(Orientation::NATIVE).Data(), p2bCache, clipFrom, clipLength);
+ if (HasPulseCallWidth())
+ tags[internal::Label(BamRecordTag::PULSE_CALL_WIDTH)] = internal::ClipPulse(PulseCallWidth(Orientation::NATIVE).Data(), p2bCache, clipFrom, clipLength);
+ if (HasStartFrame())
+ tags[internal::Label(BamRecordTag::START_FRAME)] = internal::ClipPulse(StartFrame(Orientation::NATIVE), p2bCache, clipFrom, clipLength);
+
+ }
+
+ impl_.Tags(tags);
+}
+
+BamRecord& BamRecord::ClipToQuery(const Position start,
+ const Position end)
+{
+ // cache original coords, skip out if clip not needed
+ const size_t seqLength = impl_.SequenceLength();
+ const RecordType type = Type();
+ const Position origQStart = (type == RecordType::CCS) ? Position(0) : QueryStart();
+ const Position origQEnd = (type == RecordType::CCS) ? Position(seqLength) : QueryEnd();
+ if (start <= origQStart && end >= origQEnd)
+ return *this;
+
+ // determine new offsets into data
+ const size_t startOffset = start - origQStart;
+ const size_t endOffset = origQEnd - end;
+
+ // maybe update CIGAR & aligned position
+ if (IsMapped()) {
+
+ // fetch a 'working copy' of CIGAR data
+ Cigar cigar = impl_.CigarData();
+
+ // clip leading CIGAR ops
+ size_t referencePositionOffset = 0;
+ size_t remaining = startOffset;
+ while (remaining > 0 && !cigar.empty()) {
+ CigarOperation& firstOp = cigar.front();
+ const size_t firstOpLength = firstOp.Length();
+ const bool consumesQuery = internal::ConsumesQuery(firstOp.Type());
+ const bool consumesRef = internal::ConsumesReference(firstOp.Type());
+
+ // if (!consumesQuery)
+ // just pop (e.g. deletion) ?
+ // else {
+ // check bounds, like clip to reference ?
+ // }
+
+ // CIGAR op ends at or before clip
+ if (firstOpLength <= remaining) {
+ cigar.erase(cigar.begin());
+ if (consumesQuery)
+ remaining -= firstOpLength;
+ if (consumesRef)
+ referencePositionOffset += firstOpLength;
+ }
+
+ // CIGAR op straddles clip
+ else {
+ firstOp.Length(firstOpLength - remaining);
+ if (consumesRef)
+ referencePositionOffset += remaining;
+ remaining = 0;
+ }
+ }
+
+ // clip trailing CIGAR ops
+ remaining = endOffset;
+ while (remaining > 0 && !cigar.empty()) {
+ CigarOperation& lastOp = cigar.back();
+ const size_t lastOpLength = lastOp.Length();
+ const bool consumesQuery = internal::ConsumesQuery(lastOp.Type());
+
+ // CIGAR op ends at or after clip
+ if (lastOpLength <= remaining) {
+ cigar.pop_back();
+ if (consumesQuery)
+ remaining -= lastOpLength;
+ }
+
+ // CIGAR op straddles clip
+ else {
+ lastOp.Length(lastOpLength - remaining);
+ remaining = 0;
+ }
+ }
+
+ // update CIGAR & position
+ impl_.CigarData(cigar);
+ const Position origPosition = impl_.Position();
+ impl_.Position(origPosition + referencePositionOffset);
+ }
+
+ // clip SEQ, QUAL, & tags
+ const size_t clipFrom = startOffset;
+ const size_t clipLength = (end - start);
+ ClipFields(clipFrom, clipLength);
+
+ // update query start/end
+ // TODO: update name to reflect new QS/QE ???
+ internal::CreateOrEdit(BamRecordTag::QUERY_START, start, &impl_);
+ internal::CreateOrEdit(BamRecordTag::QUERY_END, end, &impl_);
+// UpdateName();
+
+ // reset any cached aligned start/end
+ ResetCachedPositions();
+ return *this;
+}
+
+BamRecord& BamRecord::ClipToReference(const Position start,
+ const Position end)
+{
+ // skip if not mapped, clipping to reference doesn't make sense
+ // or should we even consider throwing here?
+ if (!IsMapped())
+ return *this;
+
+ const bool isForwardStrand = (AlignedStrand() == Strand::FORWARD);
+ return (isForwardStrand ? ClipToReferenceForward(start, end)
+ : ClipToReferenceReverse(start, end));
+}
+
+BamRecord& BamRecord::ClipToReferenceForward(const PacBio::BAM::Position start,
+ const PacBio::BAM::Position end)
+{
+ assert(IsMapped());
+ assert(AlignedStrand() == Strand::FORWARD);
+
+ // cache original coords
+ const size_t seqLength = impl_.SequenceLength();
+ const RecordType type = Type();
+ const Position origQStart = (type == RecordType::CCS) ? Position(0) : QueryStart();
+ const Position origQEnd = (type == RecordType::CCS) ? Position(seqLength) : QueryEnd();
+ const Position origTStart = ReferenceStart();
+ const Position origTEnd = ReferenceEnd();
+ assert(AlignedStart() >= origQStart);
+ assert(AlignedEnd() <= origQEnd);
+
+ // skip if already within requested clip range
+ if (start <= origTStart && end >= origTEnd)
+ return *this;
+
+ const Position newTStart = std::max(origTStart, start);
+ const Position newTEnd = std::min(origTEnd, end);
+
+ // fetch a 'working copy' of CIGAR data
+ Cigar cigar = impl_.CigarData();
+
+ // we're going to skip query sequence outside aligned region
+ size_t queryPosRemovedFront = 0;
+ size_t queryPosRemovedBack = 0;
+
+ // ------------------------
+ // clip leading CIGAR ops
+ // ------------------------
+
+ size_t remaining = newTStart - origTStart;
+ while (remaining > 0 && !cigar.empty()) {
+ CigarOperation& firstOp = cigar.front();
+ const size_t firstOpLength = firstOp.Length();
+ const bool consumesQuery = internal::ConsumesQuery(firstOp.Type());
+ const bool consumesRef = internal::ConsumesReference(firstOp.Type());
+
+ if (!consumesRef) {
+
+ // e.g. softclip - just pop it completely
+ cigar.erase(cigar.begin());
+ if (consumesQuery)
+ queryPosRemovedFront += firstOpLength;
+
+ } else {
+ assert(consumesRef);
+
+ // CIGAR ends at or before clip
+ if (firstOpLength <= remaining) {
+ cigar.erase(cigar.begin());
+ if (consumesQuery)
+ queryPosRemovedFront += firstOpLength;
+ if (consumesRef)
+ remaining -= firstOpLength;
+ }
+
+ // CIGAR straddles clip
+ else {
+ assert(firstOpLength > remaining);
+ firstOp.Length(firstOpLength - remaining);
+ if (consumesQuery)
+ queryPosRemovedFront += remaining;
+ remaining = 0;
+ }
+ }
+ }
+
+ // -------------------------
+ // clip trailing CIGAR ops
+ // -------------------------
+
+ remaining = origTEnd - newTEnd;
+ while (remaining > 0 && !cigar.empty()) {
+ CigarOperation& lastOp = cigar.back();
+ const size_t lastOpLength = lastOp.Length();
+ const bool consumesQuery = internal::ConsumesQuery(lastOp.Type());
+ const bool consumesRef = internal::ConsumesReference(lastOp.Type());
+
+ if (!consumesRef) {
+
+ // e.g. softclip - just pop it completely
+ cigar.pop_back();
+ if (consumesQuery)
+ queryPosRemovedBack += lastOpLength;
+
+ } else {
+ assert(consumesRef);
+
+ // CIGAR ends at or after clip
+ if (lastOpLength <= remaining) {
+ cigar.pop_back();
+ if (consumesQuery)
+ queryPosRemovedBack += lastOpLength;
+ if (consumesRef)
+ remaining -= lastOpLength;
+ }
+
+ // CIGAR straddles clip
+ else {
+ assert(lastOpLength > remaining);
+ lastOp.Length(lastOpLength - remaining);
+ if (consumesQuery)
+ queryPosRemovedBack += remaining;
+ remaining = 0;
+ }
+ }
+ }
+
+ // update CIGAR and position
+ impl_.CigarData(cigar);
+ impl_.Position(newTStart);
+
+ // clip SEQ, QUAL, tags
+ const Position qStart = origQStart + queryPosRemovedFront;
+ const Position qEnd = origQEnd - queryPosRemovedBack;
+ const size_t clipFrom = queryPosRemovedFront;
+ const size_t clipLength = qEnd - qStart;
+ ClipFields(clipFrom, clipLength);
+
+ // update query start/end
+ internal::CreateOrEdit(BamRecordTag::QUERY_START, qStart, &impl_);
+ internal::CreateOrEdit(BamRecordTag::QUERY_END, qEnd, &impl_);
+// UpdateName();
+
+ // reset any cached aligned start/end
+ ResetCachedPositions();
+ return *this;
+}
+
+BamRecord& BamRecord::ClipToReferenceReverse(const PacBio::BAM::Position start,
+ const PacBio::BAM::Position end)
+{
+ assert(IsMapped());
+ assert(AlignedStrand() == Strand::REVERSE);
+
+ // cache original coords
+ const size_t seqLength = impl_.SequenceLength();
+ const RecordType type = Type();
+ const Position origQStart = (type == RecordType::CCS) ? Position(0) : QueryStart();
+ const Position origQEnd = (type == RecordType::CCS) ? Position(seqLength) : QueryEnd();
+ const Position origTStart = ReferenceStart();
+ const Position origTEnd = ReferenceEnd();
+
+ // skip if already within requested clip range
+ if (start <= origTStart && end >= origTEnd)
+ return *this;
+ assert(AlignedStart() >= origQStart);
+ assert(AlignedEnd() <= origQEnd);
+
+ const Position newTStart = std::max(origTStart, start);
+ const Position newTEnd = std::min(origTEnd, end);
+
+ Cigar cigar = impl_.CigarData();
+
+ size_t queryPosRemovedFront = 0;
+ size_t queryPosRemovedBack = 0;
+
+ // update CIGAR - clip front ops, then clip back ops
+ size_t remaining = newTStart - origTStart;
+ while (remaining > 0 && !cigar.empty()) {
+ CigarOperation& firstOp = cigar.front();
+ const CigarOperationType firstOpType = firstOp.Type();
+ const size_t firstOpLength = firstOp.Length();
+ const bool consumesQuery = internal::ConsumesQuery(firstOpType);
+ const bool consumesRef = internal::ConsumesReference(firstOpType);
+
+ if (!consumesRef) {
+
+ // e.g. softclip - just pop it completely
+ cigar.erase(cigar.begin());
+ if (consumesQuery)
+ queryPosRemovedBack += firstOpLength;
+
+ } else {
+ assert(consumesRef);
+
+ // CIGAR ends at or before clip
+ if (firstOpLength <= remaining) {
+ cigar.erase(cigar.begin());
+ if (consumesQuery)
+ queryPosRemovedBack += firstOpLength;
+ if (consumesRef)
+ remaining -= firstOpLength;
+ }
+
+ // CIGAR straddles clip
+ else {
+ assert(firstOpLength > remaining);
+ firstOp.Length(firstOpLength - remaining);
+ if (consumesQuery)
+ queryPosRemovedBack += remaining;
+ remaining = 0;
+ }
+ }
+ }
+
+ remaining = origTEnd - newTEnd;
+ while (remaining > 0 && !cigar.empty()) {
+ CigarOperation& lastOp = cigar.back();
+ const CigarOperationType lastOpType = lastOp.Type();
+ const size_t lastOpLength = lastOp.Length();
+ const bool consumesQuery = internal::ConsumesQuery(lastOpType);
+ const bool consumesRef = internal::ConsumesReference(lastOpType);
+
+ if (!consumesRef) {
+
+ // e.g. softclip - just pop it completely
+ cigar.pop_back();
+ if (consumesQuery)
+ queryPosRemovedFront += lastOpLength;
+
+ } else {
+ assert(consumesRef);
+
+ // CIGAR ends at or before clip
+ if (lastOpLength <= remaining) {
+ cigar.pop_back();
+ if (consumesQuery)
+ queryPosRemovedFront += lastOpLength;
+ if (consumesRef)
+ remaining -= lastOpLength;
+ }
+
+ // CIGAR straddles clip
+ else {
+ assert(lastOpLength > remaining);
+ lastOp.Length(lastOpLength - remaining);
+ if (consumesQuery)
+ queryPosRemovedFront += remaining;
+ remaining = 0;
+ }
+ }
+ }
+ impl_.CigarData(cigar);
+
+ // update aligned reference position
+ impl_.Position(newTStart);
+
+ // clip SEQ, QUAL, tags
+ const Position qStart = origQStart + queryPosRemovedFront;
+ const Position qEnd = origQEnd - queryPosRemovedBack;
+ const size_t clipFrom = queryPosRemovedFront;
+ const size_t clipLength = qEnd - qStart;
+ ClipFields(clipFrom, clipLength);
+
+ // update query start/end
+ internal::CreateOrEdit(BamRecordTag::QUERY_START, qStart, &impl_);
+ internal::CreateOrEdit(BamRecordTag::QUERY_END, qEnd, &impl_);
+// UpdateName();
+
+ // reset any cached aligned start/end
+ ResetCachedPositions();
+ return *this;
+}
+
+QualityValues BamRecord::DeletionQV(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips) const
+{
+ return FetchQualities(BamRecordTag::DELETION_QV,
+ orientation,
+ aligned,
+ exciseSoftClips);
+}
+
+BamRecord& BamRecord::DeletionQV(const QualityValues& deletionQVs)
+{
+ internal::CreateOrEdit(BamRecordTag::DELETION_QV,
+ deletionQVs.Fastq(),
+ &impl_);
+ return *this;
+}
+
+
+string BamRecord::DeletionTag(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips) const
+{
+ return FetchBases(BamRecordTag::DELETION_TAG,
+ orientation,
+ aligned,
+ exciseSoftClips);
+}
+
+BamRecord& BamRecord::DeletionTag(const string& tags)
+{
+ internal::CreateOrEdit(BamRecordTag::DELETION_TAG,
+ tags,
+ &impl_);
+ return *this;
+}
+
+vector<uint16_t>
+BamRecord::EncodePhotons(const vector<float>& data)
+{
+ vector<uint16_t> encoded;
+ encoded.reserve(data.size());
+ for (const auto& d : data)
+ encoded.emplace_back(d * photonFactor);
+ return encoded;
+}
+
+string BamRecord::FetchBasesRaw(const BamRecordTag tag) const
+{
+ const Tag& seqTag = impl_.TagValue(tag);
+ return seqTag.ToString();
+}
+
+string BamRecord::FetchBases(const BamRecordTag tag,
+ const Orientation orientation,
+ const bool aligned,
+ const bool exciseSoftClips,
+ const PulseBehavior pulseBehavior) const
+{
+ const bool isBamSeq = (tag == BamRecordTag::SEQ);
+ const bool isPulse = internal::BamRecordTags::IsPulse(tag);
+
+ // fetch raw
+ string bases;
+ Orientation current;
+ if (isBamSeq) { // SEQ stored in genomic orientation
+ bases = impl_.Sequence();
+ current = Orientation::GENOMIC;
+ } else { // all tags stored in native orientation
+ bases = FetchBasesRaw(tag);
+ current = Orientation::NATIVE;
+ }
+
+ // maybe strip 'squashed' pulse loci
+ if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
+ CalculatePulse2BaseCache();
+ bases = p2bCache_->RemoveSquashedPulses(bases);
+ }
+
+ // if we need to touch CIGAR
+ if (aligned || exciseSoftClips) {
+
+ if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
+ throw std::runtime_error("Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
+ "Use PulseBehavior::BASECALLS_ONLY instead.");
+
+ // force into genomic orientation
+ internal::OrientBasesAsRequested(&bases,
+ current,
+ Orientation::GENOMIC,
+ impl_.IsReverseStrand(),
+ isPulse);
+ current = Orientation::GENOMIC;
+
+ // clip & gapify as requested
+ internal::ClipAndGapifyBases(impl_,
+ aligned,
+ exciseSoftClips,
+ &bases);
+ }
+
+ // return in the orientation requested
+ internal::OrientBasesAsRequested(&bases,
+ current,
+ orientation,
+ impl_.IsReverseStrand(),
+ isPulse);
+ return bases;
+}
+
+Frames BamRecord::FetchFramesRaw(const BamRecordTag tag) const
+{
+ Frames frames;
+ const Tag& frameTag = impl_.TagValue(tag);
+ if (frameTag.IsNull())
+ return frames; // throw ?
+
+ // lossy frame codes
+ if (frameTag.IsUInt8Array()) {
+ const vector<uint8_t> codes = frameTag.ToUInt8Array();
+ frames = Frames::Decode(codes);
+ }
+
+ // lossless frame data
+ else {
+ assert(frameTag.IsUInt16Array());
+ frames.Data(frameTag.ToUInt16Array());
+ }
+
+ return frames;
+}
+
+Frames BamRecord::FetchFrames(const BamRecordTag tag,
+ const Orientation orientation,
+ const bool aligned,
+ const bool exciseSoftClips,
+ const PulseBehavior pulseBehavior) const
+{
+ const bool isPulse = internal::BamRecordTags::IsPulse(tag);
+
+ // fetch raw
+ Frames frames = FetchFramesRaw(tag);
+ Orientation current = Orientation::NATIVE;
+
+ // maybe strip 'squashed' pulse loci
+ if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
+ CalculatePulse2BaseCache();
+ frames.DataRaw() = p2bCache_->RemoveSquashedPulses(frames.Data());
+ }
+
+ // if we need to touch the CIGAR
+ if (aligned || exciseSoftClips) {
+
+ if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
+ throw std::runtime_error("Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
+ "Use PulseBehavior::BASECALLS_ONLY instead.");
+
+ // force into genomic orientation
+ internal::OrientTagDataAsRequested(&frames,
+ current,
+ Orientation::GENOMIC,
+ impl_.IsReverseStrand());
+ current = Orientation::GENOMIC;
+
+ // clip & gapify as requested
+ internal::ClipAndGapifyFrames(impl_,
+ aligned,
+ exciseSoftClips,
+ &frames);
+ }
+
+ // return in the orientation requested
+ internal::OrientTagDataAsRequested(&frames,
+ current,
+ orientation,
+ impl_.IsReverseStrand());
+ return frames;
+
+}
+
+vector<float> BamRecord::FetchPhotonsRaw(const BamRecordTag tag) const
+{
+ const Tag& frameTag = impl_.TagValue(tag);
+ if (frameTag.IsNull())
+ return vector<float>();
+ if(!frameTag.IsUInt16Array())
+ throw std::runtime_error("Photons are not a uint16_t array, tag " +
+ internal::BamRecordTags::LabelFor(tag));
+ const vector<uint16_t> data = frameTag.ToUInt16Array();
+
+ vector<float> photons;
+ photons.reserve(data.size());
+ for (const auto& d : data)
+ photons.emplace_back(d / photonFactor);
+ return photons;
+}
+
+vector<float> BamRecord::FetchPhotons(const BamRecordTag tag,
+ const Orientation orientation,
+ const bool aligned,
+ const bool exciseSoftClips,
+ const PulseBehavior pulseBehavior) const
+{
+ const bool isPulse = internal::BamRecordTags::IsPulse(tag);
+
+ // fetch raw
+ auto data = FetchPhotonsRaw(tag);
+ Orientation current = Orientation::NATIVE;
+
+ if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
+ // strip 'squashed' pulse loci
+ CalculatePulse2BaseCache();
+ data = p2bCache_->RemoveSquashedPulses(data);
+ }
+
+ if (aligned || exciseSoftClips) {
+
+ if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
+ throw std::runtime_error("Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
+ "Use PulseBehavior::BASECALLS_ONLY instead.");
+
+ // force into genomic orientation
+ internal::OrientTagDataAsRequested(&data,
+ current,
+ Orientation::GENOMIC,
+ impl_.IsReverseStrand());
+ current = Orientation::GENOMIC;
+
+ // clip & gapify as requested
+ internal::ClipAndGapifyPhotons(impl_,
+ aligned,
+ exciseSoftClips,
+ &data);
+ }
+
+ // return in the orientation requested
+ internal::OrientTagDataAsRequested(&data,
+ current,
+ orientation,
+ impl_.IsReverseStrand());
+ return data;
+}
+
+QualityValues BamRecord::FetchQualitiesRaw(const BamRecordTag tag) const
+{
+ const Tag& qvsTag = impl_.TagValue(tag);
+ return QualityValues::FromFastq(qvsTag.ToString());
+}
+
+QualityValues BamRecord::FetchQualities(const BamRecordTag tag,
+ const Orientation orientation,
+ const bool aligned,
+ const bool exciseSoftClips,
+ const PulseBehavior pulseBehavior) const
+{
+ // requested data info
+ const bool isBamQual = (tag == BamRecordTag::QUAL);
+ const bool isPulse = internal::BamRecordTags::IsPulse(tag);
+
+ // fetch raw
+ QualityValues quals;
+ Orientation current;
+ if (isBamQual) { // QUAL stored in genomic orientation
+ quals = impl_.Qualities();
+ current = Orientation::GENOMIC;
+ } else { // all tags stored in native orientation
+ quals = FetchQualitiesRaw(tag);
+ current = Orientation::NATIVE;
+ }
+
+ if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
+ // strip 'squashed' pulse loci
+ CalculatePulse2BaseCache();
+ quals = p2bCache_->RemoveSquashedPulses(quals);
+ }
+
+ // if we need to touch CIGAR
+ if (aligned || exciseSoftClips) {
+
+ if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
+ throw std::runtime_error("Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
+ "Use PulseBehavior::BASECALLS_ONLY instead.");
+
+ // force into genomic orientation
+ internal::OrientTagDataAsRequested(&quals,
+ current,
+ Orientation::GENOMIC,
+ impl_.IsReverseStrand());
+ current = Orientation::GENOMIC;
+
+ // clip & gapify as requested
+ internal::ClipAndGapifyQualities(impl_,
+ aligned,
+ exciseSoftClips,
+ &quals);
+ }
+
+ // return in the orientation requested
+ internal::OrientTagDataAsRequested(&quals,
+ current,
+ orientation,
+ impl_.IsReverseStrand());
+ return quals;
+}
+
+vector<uint32_t> BamRecord::FetchUIntsRaw(const BamRecordTag tag) const
+{
+ // fetch tag data
+ const Tag& frameTag = impl_.TagValue(tag);
+ if (frameTag.IsNull())
+ return std::vector<uint32_t>();
+ if(!frameTag.IsUInt32Array())
+ throw std::runtime_error("Tag data are not a uint32_t array, tag " +
+ internal::BamRecordTags::LabelFor(tag));
+ return frameTag.ToUInt32Array();
+}
+
+vector<uint32_t> BamRecord::FetchUInts(const BamRecordTag tag,
+ const Orientation orientation,
+ const bool aligned,
+ const bool exciseSoftClips,
+ const PulseBehavior pulseBehavior) const
+{
+ const bool isPulse = internal::BamRecordTags::IsPulse(tag);
+
+ // fetch raw
+ auto arr = FetchUIntsRaw(tag);
+ Orientation current = Orientation::NATIVE;
+
+ if (isPulse && pulseBehavior == PulseBehavior::BASECALLS_ONLY) {
+ // strip 'squashed' pulse loci
+ CalculatePulse2BaseCache();
+ arr = p2bCache_->RemoveSquashedPulses(arr);
+ }
+
+ if (aligned || exciseSoftClips) {
+
+ if (isPulse && pulseBehavior != PulseBehavior::BASECALLS_ONLY)
+ throw std::runtime_error("Cannot return data at all pulses when gapping and/or soft-clipping are requested. "
+ "Use PulseBehavior::BASECALLS_ONLY instead.");
+
+ // force into genomic orientation
+ internal::OrientTagDataAsRequested(&arr,
+ current,
+ Orientation::GENOMIC,
+ impl_.IsReverseStrand());
+ current = Orientation::GENOMIC;
+
+ // clip & gapify as requested
+ internal::ClipAndGapifyUInts(impl_,
+ aligned,
+ exciseSoftClips,
+ &arr);
+ }
+
+ // return in the orientation requested
+ internal::OrientTagDataAsRequested(&arr,
+ current,
+ orientation,
+ impl_.IsReverseStrand());
+ return arr;
+}
+
+string BamRecord::FullName(void) const
+{ return impl_.Name(); }
+
+bool BamRecord::HasAltLabelQV(void) const
+{ return impl_.HasTag(BamRecordTag::ALT_LABEL_QV); }
+
+bool BamRecord::HasAltLabelTag(void) const
+{ return impl_.HasTag(BamRecordTag::ALT_LABEL_TAG); }
+
+bool BamRecord::HasBarcodes(void) const
+{ return impl_.HasTag(BamRecordTag::BARCODES); }
+
+bool BamRecord::HasBarcodeQuality(void) const
+{ return impl_.HasTag(BamRecordTag::BARCODE_QUALITY); }
+
+bool BamRecord::HasLabelQV(void) const
+{ return impl_.HasTag(BamRecordTag::LABEL_QV); }
+
+bool BamRecord::HasDeletionQV(void) const
+{ return impl_.HasTag(BamRecordTag::DELETION_QV); }
+
+bool BamRecord::HasDeletionTag(void) const
+{ return impl_.HasTag(BamRecordTag::DELETION_TAG); }
+
+bool BamRecord::HasHoleNumber(void) const
+{
+ return impl_.HasTag(BamRecordTag::HOLE_NUMBER)
+ && !impl_.TagValue(BamRecordTag::HOLE_NUMBER).IsNull();
+}
+
+bool BamRecord::HasInsertionQV(void) const
+{ return impl_.HasTag(BamRecordTag::INSERTION_QV); }
+
+bool BamRecord::HasNumPasses(void) const
+{ return impl_.HasTag(BamRecordTag::NUM_PASSES); }
+
+bool BamRecord::HasPreBaseFrames(void) const
+{ return HasIPD(); }
+
+bool BamRecord::HasIPD(void) const
+{ return impl_.HasTag(BamRecordTag::IPD); }
+
+bool BamRecord::HasLocalContextFlags(void) const
+{ return impl_.HasTag(BamRecordTag::CONTEXT_FLAGS); }
+
+bool BamRecord::HasMergeQV(void) const
+{ return impl_.HasTag(BamRecordTag::MERGE_QV); }
+
+bool BamRecord::HasPulseMergeQV(void) const
+{ return impl_.HasTag(BamRecordTag::PULSE_MERGE_QV); }
+
+bool BamRecord::HasPkmean(void) const
+{ return impl_.HasTag(BamRecordTag::PKMEAN); }
+
+bool BamRecord::HasPkmean2(void) const
+{ return impl_.HasTag(BamRecordTag::PKMEAN_2); }
+
+bool BamRecord::HasPkmid(void) const
+{ return impl_.HasTag(BamRecordTag::PKMID); }
+
+bool BamRecord::HasPkmid2(void) const
+{ return impl_.HasTag(BamRecordTag::PKMID_2); }
+
+bool BamRecord::HasPrePulseFrames(void) const
+{ return impl_.HasTag(BamRecordTag::PRE_PULSE_FRAMES); }
+
+bool BamRecord::HasPulseCall(void) const
+{ return impl_.HasTag(BamRecordTag::PULSE_CALL)
+ && !impl_.TagValue(BamRecordTag::PULSE_CALL).IsNull();
+}
+
+bool BamRecord::HasPulseCallWidth(void) const
+{ return impl_.HasTag(BamRecordTag::PULSE_CALL_WIDTH); }
+
+bool BamRecord::HasPulseWidth(void) const
+{ return impl_.HasTag(BamRecordTag::PULSE_WIDTH); }
+
+bool BamRecord::HasQueryEnd(void) const
+{ return impl_.HasTag(BamRecordTag::QUERY_END); }
+
+bool BamRecord::HasQueryStart(void) const
+{ return impl_.HasTag(BamRecordTag::QUERY_START); }
+
+bool BamRecord::HasReadAccuracy(void) const
+{ return impl_.HasTag(BamRecordTag::READ_ACCURACY)
+ && !impl_.TagValue(BamRecordTag::READ_ACCURACY).IsNull();
+}
+
+bool BamRecord::HasScrapRegionType(void) const
+{ return impl_.HasTag(BamRecordTag::SCRAP_REGION_TYPE)
+ && !impl_.TagValue(BamRecordTag::SCRAP_REGION_TYPE).IsNull();
+}
+
+bool BamRecord::HasScrapZmwType(void) const
+{ return impl_.HasTag(BamRecordTag::SCRAP_ZMW_TYPE)
+ && !impl_.TagValue(BamRecordTag::SCRAP_ZMW_TYPE).IsNull();
+}
+
+bool BamRecord::HasStartFrame(void) const
+{ return impl_.HasTag(BamRecordTag::START_FRAME); }
+
+bool BamRecord::HasSignalToNoise(void) const
+{ return impl_.HasTag(BamRecordTag::SNR); }
+
+bool BamRecord::HasSubstitutionQV(void) const
+{ return impl_.HasTag(BamRecordTag::SUBSTITUTION_QV); }
+
+bool BamRecord::HasSubstitutionTag(void) const
+{ return impl_.HasTag(BamRecordTag::SUBSTITUTION_TAG); }
+
+BamHeader BamRecord::Header(void) const
+{ return header_; }
+
+int32_t BamRecord::HoleNumber(void) const
+{
+ const Tag& holeNumber = impl_.TagValue(BamRecordTag::HOLE_NUMBER);
+ if (!holeNumber.IsNull())
+ return holeNumber.ToInt32();
+
+ // missing zm tag - try to pull from name
+ return internal::HoleNumberFromName(FullName());
+}
+
+BamRecord& BamRecord::HoleNumber(const int32_t holeNumber)
+{
+ internal::CreateOrEdit(BamRecordTag::HOLE_NUMBER,
+ holeNumber,
+ &impl_);
+ return *this;
+}
+
+BamRecordImpl& BamRecord::Impl(void)
+{ return impl_; }
+
+const BamRecordImpl& BamRecord::Impl(void) const
+{ return impl_; }
+
+QualityValues BamRecord::InsertionQV(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips) const
+{
+ return FetchQualities(BamRecordTag::INSERTION_QV,
+ orientation,
+ aligned,
+ exciseSoftClips);
+}
+
+BamRecord& BamRecord::InsertionQV(const QualityValues& insertionQVs)
+{
+ internal::CreateOrEdit(BamRecordTag::INSERTION_QV,
+ insertionQVs.Fastq(),
+ &impl_);
+ return *this;
+}
+
+Frames BamRecord::IPD(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips) const
+{
+ return FetchFrames(BamRecordTag::IPD,
+ orientation,
+ aligned,
+ exciseSoftClips);
+}
+
+BamRecord& BamRecord::IPD(const Frames& frames,
+ const FrameEncodingType encoding)
+{
+ if (encoding == FrameEncodingType::LOSSY)
+ internal::CreateOrEdit(BamRecordTag::IPD, frames.Encode(), &impl_);
+ else
+ internal::CreateOrEdit(BamRecordTag::IPD, frames.Data(), &impl_);
+ return *this;
+}
+
+Frames BamRecord::IPDRaw(Orientation orientation) const
+{
+ Frames frames;
+ const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::IPD);
+ const Tag& frameTag = impl_.TagValue(tagName);
+ if (frameTag.IsNull())
+ return frames;
+
+ // lossy frame codes
+ if (frameTag.IsUInt8Array()) {
+ const vector<uint8_t> codes = frameTag.ToUInt8Array();
+ const vector<uint16_t> codes16(codes.begin(), codes.end());
+ frames.Data(std::move(codes16));
+ }
+
+ // lossless frame data
+ else {
+ assert(frameTag.IsUInt16Array());
+ frames.Data(frameTag.ToUInt16Array());
+ }
+
+ // return in requested orientation
+ internal::OrientTagDataAsRequested(&frames,
+ Orientation::NATIVE, // current
+ orientation, // requested
+ impl_.IsReverseStrand());
+ return frames;
+}
+
+bool BamRecord::IsMapped(void) const
+{ return impl_.IsMapped(); }
+
+QualityValues BamRecord::LabelQV(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior) const
+{
+ return FetchQualities(BamRecordTag::LABEL_QV,
+ orientation,
+ aligned,
+ exciseSoftClips,
+ pulseBehavior);
+}
+
+BamRecord& BamRecord::LabelQV(const QualityValues& labelQVs)
+{
+ internal::CreateOrEdit(BamRecordTag::LABEL_QV,
+ labelQVs.Fastq(),
+ &impl_);
+ return *this;
+}
+
+LocalContextFlags BamRecord::LocalContextFlags(void) const
+{
+ const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::CONTEXT_FLAGS);
+ const Tag& cxTag = impl_.TagValue(tagName);
+ return static_cast<PacBio::BAM::LocalContextFlags>(cxTag.ToUInt8());
+}
+
+BamRecord& BamRecord::LocalContextFlags(const PacBio::BAM::LocalContextFlags flags)
+{
+ internal::CreateOrEdit(BamRecordTag::CONTEXT_FLAGS,
+ static_cast<uint8_t>(flags),
+ &impl_);
+ return *this;
+}
+
+BamRecord& BamRecord::Map(const int32_t referenceId,
+ const Position refStart,
+ const Strand strand,
+ const Cigar& cigar,
+ const uint8_t mappingQuality)
+{
+ impl_.Position(refStart);
+ impl_.ReferenceId(referenceId);
+ impl_.CigarData(cigar);
+ impl_.MapQuality(mappingQuality);
+ impl_.SetMapped(true);
+
+ if (strand == Strand::FORWARD)
+ impl_.SetReverseStrand(false);
+
+ else {
+ assert(strand == Strand::REVERSE);
+ impl_.SetReverseStrand(true);
+
+ // switch seq & qual
+ string sequence = impl_.Sequence();
+ QualityValues qualities = impl_.Qualities();
+
+ internal::ReverseComplement(sequence);
+ internal::Reverse(qualities);
+
+ impl_.SetSequenceAndQualities(sequence, qualities.Fastq());
+ }
+
+ // reset any cached aligned start/end
+ alignedStart_ = PacBio::BAM::UnmappedPosition;
+ alignedEnd_ = PacBio::BAM::UnmappedPosition;
+
+ return *this;
+}
+
+uint8_t BamRecord::MapQuality(void) const
+{ return impl_.MapQuality(); }
+
+QualityValues BamRecord::MergeQV(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips) const
+{
+ return FetchQualities(BamRecordTag::MERGE_QV,
+ orientation,
+ aligned,
+ exciseSoftClips);
+}
+
+BamRecord& BamRecord::MergeQV(const QualityValues& mergeQVs)
+{
+ internal::CreateOrEdit(BamRecordTag::MERGE_QV,
+ mergeQVs.Fastq(),
+ &impl_);
+ return *this;
+}
+
+string BamRecord::MovieName(void) const
+{ return ReadGroup().MovieName(); }
+
+size_t BamRecord::NumDeletedBases(void) const
+{
+ auto tEnd = ReferenceEnd();
+ auto tStart = ReferenceStart();
+ auto numMatchesAndMismatches = NumMatchesAndMismatches();
+ auto nM = numMatchesAndMismatches.first;
+ auto nMM = numMatchesAndMismatches.second;
+ return (tEnd - tStart - nM - nMM);
+}
+
+size_t BamRecord::NumInsertedBases(void) const
+{
+ auto aEnd = AlignedEnd();
+ auto aStart = AlignedStart();
+ auto numMatchesAndMismatches = NumMatchesAndMismatches();
+ auto nM = numMatchesAndMismatches.first;
+ auto nMM = numMatchesAndMismatches.second;
+ return (aEnd - aStart - nM - nMM);
+}
+
+size_t BamRecord::NumMatches(void) const
+{ return NumMatchesAndMismatches().first; }
+
+pair<size_t, size_t> BamRecord::NumMatchesAndMismatches(void) const
+{
+ pair<size_t, size_t> result = make_pair(0,0);
+ PBBAM_SHARED_PTR<bam1_t> b = internal::BamRecordMemory::GetRawData(this);
+ uint32_t* cigarData = bam_get_cigar(b.get());
+ for (uint32_t i = 0; i < b->core.n_cigar; ++i) {
+ const CigarOperationType type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
+ if (type == CigarOperationType::SEQUENCE_MATCH)
+ result.first += bam_cigar_oplen(cigarData[i]);
+ else if (type == CigarOperationType::SEQUENCE_MISMATCH)
+ result.second += bam_cigar_oplen(cigarData[i]);
+ }
+ return result;
+}
+
+size_t BamRecord::NumMismatches(void) const
+{ return NumMatchesAndMismatches().second; }
+
+int32_t BamRecord::NumPasses(void) const
+{
+ const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::NUM_PASSES);
+ const Tag& numPasses = impl_.TagValue(tagName);
+ return numPasses.ToInt32();
+}
+
+BamRecord& BamRecord::NumPasses(const int32_t numPasses)
+{
+ internal::CreateOrEdit(BamRecordTag::NUM_PASSES,
+ numPasses,
+ &impl_);
+ return *this;
+}
+
+vector<float> BamRecord::Pkmean(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior) const
+{
+ return FetchPhotons(BamRecordTag::PKMEAN,
+ orientation,
+ aligned,
+ exciseSoftClips,
+ pulseBehavior);
+}
+
+BamRecord& BamRecord::Pkmean(const vector<float>& photons)
+{
+ Pkmean(EncodePhotons(photons));
+ return *this;
+}
+
+BamRecord& BamRecord::Pkmean(const vector<uint16_t>& encodedPhotons)
+{
+ internal::CreateOrEdit(BamRecordTag::PKMEAN,
+ encodedPhotons,
+ &impl_);
+ return *this;
+}
+
+vector<float> BamRecord::Pkmid(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior) const
+{
+ return FetchPhotons(BamRecordTag::PKMID,
+ orientation,
+ aligned,
+ exciseSoftClips,
+ pulseBehavior);
+}
+
+BamRecord& BamRecord::Pkmid(const vector<float>& photons)
+{
+ Pkmid(EncodePhotons(photons));
+ return *this;
+}
+
+BamRecord& BamRecord::Pkmid(const vector<uint16_t>& encodedPhotons)
+{
+ internal::CreateOrEdit(BamRecordTag::PKMID,
+ encodedPhotons,
+ &impl_);
+ return *this;
+}
+
+vector<float> BamRecord::Pkmean2(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior) const
+{
+ return FetchPhotons(BamRecordTag::PKMEAN_2,
+ orientation,
+ aligned,
+ exciseSoftClips,
+ pulseBehavior);
+}
+
+BamRecord& BamRecord::Pkmean2(const vector<float>& photons)
+{
+ Pkmean2(EncodePhotons(photons));
+ return *this;
+}
+
+BamRecord& BamRecord::Pkmean2(const vector<uint16_t>& encodedPhotons)
+{
+ internal::CreateOrEdit(BamRecordTag::PKMEAN_2,
+ encodedPhotons,
+ &impl_);
+ return *this;
+}
+
+vector<float> BamRecord::Pkmid2(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior) const
+{
+ return FetchPhotons(BamRecordTag::PKMID_2,
+ orientation,
+ aligned,
+ exciseSoftClips,
+ pulseBehavior);
+}
+
+BamRecord& BamRecord::Pkmid2(const vector<float>& photons)
+{
+ Pkmid2(EncodePhotons(photons));
+ return *this;
+}
+
+BamRecord& BamRecord::Pkmid2(const vector<uint16_t>& encodedPhotons)
+{
+ internal::CreateOrEdit(BamRecordTag::PKMID_2,
+ encodedPhotons,
+ &impl_);
+ return *this;
+}
+
+Frames BamRecord::PreBaseFrames(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips) const
+{ return IPD(orientation, aligned, exciseSoftClips); }
+
+BamRecord& BamRecord::PreBaseFrames(const Frames& frames,
+ const FrameEncodingType encoding)
+{ return IPD(frames, encoding); }
+
+Frames BamRecord::PrePulseFrames(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior) const
+{
+ return FetchFrames(BamRecordTag::PRE_PULSE_FRAMES,
+ orientation,
+ aligned,
+ exciseSoftClips,
+ pulseBehavior);
+}
+
+BamRecord& BamRecord::PrePulseFrames(const Frames& frames,
+ const FrameEncodingType encoding)
+{
+ if (encoding == FrameEncodingType::LOSSY) {
+ internal::CreateOrEdit(BamRecordTag::PRE_PULSE_FRAMES,
+ frames.Encode(),
+ &impl_);
+ } else {
+ internal::CreateOrEdit(BamRecordTag::PRE_PULSE_FRAMES,
+ frames.Data(),
+ &impl_);
+ }
+ return *this;
+}
+
+Frames BamRecord::PulseWidthRaw(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips) const
+{
+ Frames frames;
+ const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::PULSE_WIDTH);
+ const Tag& frameTag = impl_.TagValue(tagName);
+ if (frameTag.IsNull())
+ return frames;
+
+ // lossy frame codes
+ if (frameTag.IsUInt8Array()) {
+ const vector<uint8_t> codes = frameTag.ToUInt8Array();
+ const vector<uint16_t> codes16(codes.begin(), codes.end());
+ frames.Data(std::move(codes16));
+ }
+
+ // lossless frame data
+ else {
+ assert(frameTag.IsUInt16Array());
+ frames.Data(frameTag.ToUInt16Array());
+ }
+
+ // return in requested orientation
+ internal::OrientTagDataAsRequested(&frames,
+ Orientation::NATIVE, // current
+ orientation, // requested
+ impl_.IsReverseStrand());
+ return frames;
+}
+
+
+QualityValues BamRecord::PulseMergeQV(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior) const
+{
+ return FetchQualities(BamRecordTag::PULSE_MERGE_QV,
+ orientation,
+ aligned,
+ exciseSoftClips,
+ pulseBehavior);
+}
+
+BamRecord& BamRecord::PulseMergeQV(const QualityValues& mergeQVs)
+{
+ internal::CreateOrEdit(BamRecordTag::PULSE_MERGE_QV,
+ mergeQVs.Fastq(),
+ &impl_);
+ return *this;
+}
+
+
+string BamRecord::PulseCall(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior) const
+{
+ return FetchBases(BamRecordTag::PULSE_CALL,
+ orientation,
+ aligned,
+ exciseSoftClips,
+ pulseBehavior);
+}
+
+BamRecord& BamRecord::PulseCall(const string& tags)
+{
+ internal::CreateOrEdit(BamRecordTag::PULSE_CALL,
+ tags,
+ &impl_);
+ return *this;
+}
+
+Frames BamRecord::PulseCallWidth(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior) const
+{
+ return FetchFrames(BamRecordTag::PULSE_CALL_WIDTH,
+ orientation,
+ aligned,
+ exciseSoftClips,
+ pulseBehavior);
+}
+
+BamRecord& BamRecord::PulseCallWidth(const Frames& frames,
+ const FrameEncodingType encoding)
+{
+ if (encoding == FrameEncodingType::LOSSY) {
+ internal::CreateOrEdit(BamRecordTag::PULSE_CALL_WIDTH,
+ frames.Encode(),
+ &impl_);
+ } else {
+ internal::CreateOrEdit(BamRecordTag::PULSE_CALL_WIDTH,
+ frames.Data(),
+ &impl_);
+ }
+ return *this;
+}
+
+Frames BamRecord::PulseWidth(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips) const
+{
+ return FetchFrames(BamRecordTag::PULSE_WIDTH,
+ orientation,
+ aligned,
+ exciseSoftClips,
+ PulseBehavior::ALL);
+}
+
+BamRecord& BamRecord::PulseWidth(const Frames& frames,
+ const FrameEncodingType encoding)
+{
+ if (encoding == FrameEncodingType::LOSSY) {
+ internal::CreateOrEdit(BamRecordTag::PULSE_WIDTH,
+ frames.Encode(),
+ &impl_);
+ } else {
+ internal::CreateOrEdit(BamRecordTag::PULSE_WIDTH,
+ frames.Data(),
+ &impl_);
+ }
+ return *this;
+}
+
+QualityValues BamRecord::Qualities(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips) const
+{
+ return FetchQualities(BamRecordTag::QUAL,
+ orientation,
+ aligned,
+ exciseSoftClips);
+}
+
+Position BamRecord::QueryEnd(void) const
+{
+ // try 'qe' tag
+ const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::QUERY_END);
+ const Tag& qe = impl_.TagValue(tagName);
+ if (!qe.IsNull())
+ return qe.ToInt32();
+
+ // tag missing, need to check movie name (fallback for non-PB BAMs, but ignore for CCS reads)
+ RecordType type;
+ try {
+ type = Type();
+ } catch (std::exception&) {
+ return Position(0);
+ }
+ if (type == RecordType::CCS)
+ throw std::runtime_error("no query end for CCS read type");
+
+ // PacBio BAM, non-CCS
+ try {
+ return internal::QueryEndFromName(FullName());
+ } catch (std::exception&) {
+ // return fallback position
+ return Position(0);
+ }
+}
+
+BamRecord& BamRecord::QueryEnd(const Position pos)
+{
+ internal::CreateOrEdit(BamRecordTag::QUERY_END,
+ static_cast<int32_t>(pos),
+ &impl_);
+ UpdateName();
+ return *this;
+}
+
+Position BamRecord::QueryStart(void) const
+{
+ // try 'qs' tag
+ const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::QUERY_START);
+ const Tag& qs = impl_.TagValue(tagName);
+ if (!qs.IsNull())
+ return qs.ToInt32();
+
+ // tag missing, need to check movie name (fallback for non-PB BAMs, but ignore for CCS reads)
+ RecordType type;
+ try {
+ type = Type();
+ } catch (std::exception&) {
+ return Position(0);
+ }
+ if (type == RecordType::CCS)
+ throw std::runtime_error("no query start for CCS read type");
+
+ // PacBio BAM, non-CCS
+ try {
+ return internal::QueryStartFromName(FullName());
+ } catch (std::exception&) {
+ // return fallback position
+ return Position(0);
+ }
+}
+
+BamRecord& BamRecord::QueryStart(const Position pos)
+{
+ internal::CreateOrEdit(BamRecordTag::QUERY_START,
+ static_cast<int32_t>(pos),
+ &impl_);
+ UpdateName();
+ return *this;
+}
+
+Accuracy BamRecord::ReadAccuracy(void) const
+{
+ const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::READ_ACCURACY);
+ const Tag& readAccuracy = impl_.TagValue(tagName);
+ return Accuracy(readAccuracy.ToFloat());
+}
+
+BamRecord& BamRecord::ReadAccuracy(const Accuracy& accuracy)
+{
+ internal::CreateOrEdit(BamRecordTag::READ_ACCURACY,
+ static_cast<float>(accuracy),
+ &impl_);
+ return *this;
+}
+
+ReadGroupInfo BamRecord::ReadGroup(void) const
+{ return header_.ReadGroup(ReadGroupId()); }
+
+BamRecord& BamRecord::ReadGroup(const ReadGroupInfo& rg)
+{
+ internal::CreateOrEdit(BamRecordTag::READ_GROUP,
+ rg.Id(),
+ &impl_);
+ UpdateName();
+ return *this;
+}
+
+string BamRecord::ReadGroupId(void) const
+{
+ const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::READ_GROUP);
+ const Tag& rgTag = impl_.TagValue(tagName);
+ if (rgTag.IsNull())
+ return string();
+ return rgTag.ToString();
+}
+
+BamRecord& BamRecord::ReadGroupId(const string& id)
+{
+ internal::CreateOrEdit(BamRecordTag::READ_GROUP,
+ id,
+ &impl_);
+ UpdateName();
+ return *this;
+}
+
+int32_t BamRecord::ReadGroupNumericId(void) const
+{ return ReadGroupInfo::IdToInt(ReadGroupId()); }
+
+Position BamRecord::ReferenceEnd(void) const
+{
+ if (!impl_.IsMapped())
+ return PacBio::BAM::UnmappedPosition;
+ PBBAM_SHARED_PTR<bam1_t> htsData = internal::BamRecordMemory::GetRawData(impl_);
+ if (!htsData)
+ return PacBio::BAM::UnmappedPosition;
+ return bam_endpos(htsData.get());
+}
+
+int32_t BamRecord::ReferenceId(void) const
+{ return impl_.ReferenceId(); }
+
+string BamRecord::ReferenceName(void) const
+{
+ if (IsMapped())
+ return Header().SequenceName(ReferenceId());
+ else
+ throw std::runtime_error("unmapped record has no associated reference name");
+}
+
+Position BamRecord::ReferenceStart(void) const
+{ return impl_.Position(); }
+
+void BamRecord::ResetCachedPositions(void) const
+{
+ alignedEnd_ = PacBio::BAM::UnmappedPosition;
+ alignedStart_ = PacBio::BAM::UnmappedPosition;
+}
+
+void BamRecord::ResetCachedPositions(void)
+{
+ alignedEnd_ = PacBio::BAM::UnmappedPosition;
+ alignedStart_ = PacBio::BAM::UnmappedPosition;
+}
+
+VirtualRegionType BamRecord::ScrapRegionType(void) const
+{
+ const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::SCRAP_REGION_TYPE);
+ const Tag& srTag = impl_.TagValue(tagName);
+ return VirtualRegionTypeMap::ParseChar[srTag.ToUInt8()];
+}
+
+BamRecord& BamRecord::ScrapRegionType(const VirtualRegionType type)
+{
+ internal::CreateOrEdit(BamRecordTag::SCRAP_REGION_TYPE,
+ static_cast<uint8_t>(type),
+ &impl_);
+ return *this;
+}
+
+BamRecord& BamRecord::ScrapRegionType(const char type)
+{
+ internal::CreateOrEdit(BamRecordTag::SCRAP_REGION_TYPE,
+ type,
+ &impl_);
+ return *this;
+}
+
+ZmwType BamRecord::ScrapZmwType(void) const
+{
+ const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::SCRAP_ZMW_TYPE);
+ const Tag& szTag = impl_.TagValue(tagName);
+ return ZmwTypeMap::ParseChar[szTag.ToUInt8()];
+}
+
+BamRecord& BamRecord::ScrapZmwType(const ZmwType type)
+{
+ internal::CreateOrEdit(BamRecordTag::SCRAP_ZMW_TYPE,
+ static_cast<uint8_t>(type),
+ &impl_);
+ return *this;
+}
+
+BamRecord& BamRecord::ScrapZmwType(const char type)
+{
+ internal::CreateOrEdit(BamRecordTag::SCRAP_ZMW_TYPE,
+ type,
+ &impl_);
+ return *this;
+}
+
+string BamRecord::Sequence(const Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips) const
+{
+ return FetchBases(BamRecordTag::SEQ,
+ orientation,
+ aligned,
+ exciseSoftClips);
+}
+
+vector<float> BamRecord::SignalToNoise(void) const
+{
+ const auto tagName = internal::BamRecordTags::LabelFor(BamRecordTag::SNR);
+ const Tag& snTag = impl_.TagValue(tagName);
+ return snTag.ToFloatArray();
+}
+
+BamRecord& BamRecord::SignalToNoise(const vector<float>& snr)
+{
+ internal::CreateOrEdit(BamRecordTag::SNR,
+ snr,
+ &impl_);
+ return *this;
+}
+
+vector<uint32_t> BamRecord::StartFrame(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior) const
+{
+ return FetchUInts(BamRecordTag::START_FRAME,
+ orientation,
+ aligned,
+ exciseSoftClips,
+ pulseBehavior);
+}
+
+BamRecord& BamRecord::StartFrame(const vector<uint32_t>& startFrame)
+{
+ internal::CreateOrEdit(BamRecordTag::START_FRAME,
+ startFrame,
+ &impl_);
+ return *this;
+}
+
+QualityValues BamRecord::SubstitutionQV(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips) const
+{
+ return FetchQualities(BamRecordTag::SUBSTITUTION_QV,
+ orientation,
+ aligned,
+ exciseSoftClips);
+}
+
+BamRecord& BamRecord::SubstitutionQV(const QualityValues& substitutionQVs)
+{
+ internal::CreateOrEdit(BamRecordTag::SUBSTITUTION_QV,
+ substitutionQVs.Fastq(),
+ &impl_);
+ return *this;
+}
+
+string BamRecord::SubstitutionTag(Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips) const
+{
+ return FetchBases(BamRecordTag::SUBSTITUTION_TAG,
+ orientation,
+ aligned,
+ exciseSoftClips);
+}
+
+BamRecord& BamRecord::SubstitutionTag(const string& tags)
+{
+ internal::CreateOrEdit(BamRecordTag::SUBSTITUTION_TAG,
+ tags,
+ &impl_);
+ return *this;
+}
+
+RecordType BamRecord::Type(void) const
+{
+ try {
+ const string& typeName = ReadGroup().ReadType();
+ return internal::NameToType(typeName);
+ } catch (std::exception&) {
+
+ // read group not found
+ // peek at name to see if we're CCS
+ if (FullName().find("ccs") != string::npos)
+ return RecordType::CCS;
+
+ // otherwise unknown
+ else
+ return RecordType::UNKNOWN;
+ }
+}
+
+void BamRecord::UpdateName()
+{
+ string newName;
+ newName.reserve(100);
+
+ newName += MovieName();
+ newName += "/";
+
+ if (HasHoleNumber())
+ newName += std::to_string(HoleNumber());
+ else
+ newName += "?";
+
+ newName += "/";
+
+ if (Type() == RecordType::CCS)
+ newName += "ccs";
+ else {
+ if (HasQueryStart())
+ newName += std::to_string(QueryStart());
+ else
+ newName += "?";
+
+ newName += '_';
+
+ if (HasQueryEnd())
+ newName += std::to_string(QueryEnd());
+ else
+ newName += "?";
+ }
+
+ impl_.Name(newName);
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "pbbam/BamRecordBuilder.h"
+#include "pbbam/BamTagCodec.h"
+#include "AssertUtils.h"
+#include "MemoryUtils.h"
+#include <htslib/sam.h>
+#include <cstring>
+#include <memory>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+BamRecordBuilder::BamRecordBuilder(void)
+{
+ // ensure proper clean slate
+ Reset();
+
+ // initialize with some space for data
+ name_.reserve(256);
+ sequence_.reserve(2096);
+ qualities_.reserve(2096);
+ cigar_.reserve(256);
+}
+
+BamRecordBuilder::BamRecordBuilder(const BamHeader& header)
+ : header_(header)
+{
+ // ensure proper clean slate
+ Reset();
+
+ // initialize with some space for data
+ name_.reserve(256);
+ sequence_.reserve(2096);
+ qualities_.reserve(2096);
+ cigar_.reserve(256);
+}
+
+BamRecordBuilder::BamRecordBuilder(const BamRecord& prototype)
+ : header_(prototype.Header())
+{
+ Reset(prototype);
+}
+
+BamRecordBuilder::BamRecordBuilder(const BamRecordBuilder& other)
+ : core_(other.core_)
+ , name_(other.name_)
+ , sequence_(other.sequence_)
+ , qualities_(other.qualities_)
+ , cigar_(other.cigar_)
+ , tags_(other.tags_)
+{ }
+
+BamRecordBuilder::BamRecordBuilder(BamRecordBuilder&& other)
+ : core_(std::move(other.core_))
+ , name_(std::move(other.name_))
+ , sequence_(std::move(other.sequence_))
+ , qualities_(std::move(other.qualities_))
+ , cigar_(std::move(other.cigar_))
+ , tags_(std::move(other.tags_))
+{ }
+
+BamRecordBuilder& BamRecordBuilder::operator=(const BamRecordBuilder& other)
+{
+ core_ = other.core_;
+ name_ = other.name_;
+ sequence_ = other.sequence_;
+ qualities_ = other.qualities_;
+ cigar_ = other.cigar_;
+ tags_ = other.tags_;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::operator=(BamRecordBuilder&& other)
+{
+ core_ = std::move(other.core_);
+ name_ = std::move(other.name_);
+ sequence_ = std::move(other.sequence_);
+ qualities_ = std::move(other.qualities_);
+ cigar_ = std::move(other.cigar_);
+ tags_ = std::move(other.tags_);
+ return *this;
+}
+
+BamRecordBuilder::~BamRecordBuilder(void) { }
+
+BamRecord BamRecordBuilder::Build(void) const
+{
+ BamRecord result(header_);
+ BuildInPlace(result);
+ return result;
+}
+
+bool BamRecordBuilder::BuildInPlace(BamRecord& record) const
+{
+ // initialize with basic 'core data'
+ PBBAM_SHARED_PTR<bam1_t> recordRawData = internal::BamRecordMemory::GetRawData(record); /* record.impl_.RawData().get();*/
+ PB_ASSERT_OR_RETURN_VALUE(recordRawData, false);
+ PB_ASSERT_OR_RETURN_VALUE(recordRawData->data, false);
+ recordRawData->core = core_;
+
+ // setup variable length data
+ const vector<uint8_t> encodedTags = BamTagCodec::Encode(tags_);
+
+ const size_t nameLength = name_.size() + 1;
+ const size_t numCigarOps = cigar_.size();
+ const size_t cigarLength = numCigarOps * sizeof(uint32_t);
+ const size_t seqLength = sequence_.size();
+ const size_t qualLength = seqLength;
+ const size_t tagLength = encodedTags.size();
+ const size_t dataLength = nameLength + cigarLength + seqLength + qualLength + tagLength;
+
+ // realloc if necessary
+ uint8_t* varLengthDataBlock = recordRawData->data;
+ PB_ASSERT_OR_RETURN_VALUE(varLengthDataBlock, false);
+ size_t allocatedDataLength = recordRawData->m_data;
+ if (allocatedDataLength < dataLength) {
+ allocatedDataLength = dataLength;
+ kroundup32(allocatedDataLength);
+ varLengthDataBlock = (uint8_t*)realloc(varLengthDataBlock, allocatedDataLength);
+ }
+ recordRawData->data = varLengthDataBlock;
+ recordRawData->l_data = dataLength;
+ recordRawData->m_data = allocatedDataLength;
+
+ size_t index = 0;
+
+ // name
+ memcpy(&varLengthDataBlock[index], name_.c_str(), nameLength);
+ index += nameLength;
+
+ // cigar
+ if (cigarLength > 0) {
+ vector<uint32_t> encodedCigar(numCigarOps);
+ for (size_t i = 0; i < numCigarOps; ++i) {
+ const CigarOperation& op = cigar_.at(i);
+ encodedCigar[i] = op.Length() << BAM_CIGAR_SHIFT;
+ const uint8_t type = static_cast<uint8_t>(op.Type());
+ PB_ASSERT_OR_RETURN_VALUE(type >= 0 && type < 8, false);
+ encodedCigar[i] |= type;
+ }
+ memcpy(&varLengthDataBlock[index], &encodedCigar[0], cigarLength);
+ index += cigarLength;
+
+ // update bin after we've calculated cigar info
+ const int32_t endPosition = bam_cigar2rlen(recordRawData->core.n_cigar, &encodedCigar[0]);
+ recordRawData->core.bin = hts_reg2bin(core_.pos, endPosition, 14, 5);
+ }
+
+ // seq & qual
+ if (seqLength > 0) {
+
+ uint8_t* s = &varLengthDataBlock[index];
+ for (size_t i = 0; i < seqLength; ++i)
+ s[i>>1] |= ( seq_nt16_table[static_cast<int>(sequence_.at(i))] << ((~i&1)<<2) );
+ index += seqLength;
+
+ uint8_t* q = &varLengthDataBlock[index];
+ if (!qualities_.empty())
+ memset(q, 0xFF, seqLength);
+ else {
+ for (size_t i = 0; i < seqLength; ++i)
+ q[i] = qualities_.at(i) - 33;
+ }
+ index += seqLength;
+ }
+
+ // tags
+ if (tagLength > 0) {
+ PB_ASSERT_OR_RETURN_VALUE(!encodedTags.empty(), false);
+ memcpy(&varLengthDataBlock[index], &encodedTags[0], tagLength);
+ index += tagLength;
+ }
+
+ // sanity check
+ PB_ASSERT_OR_RETURN_VALUE(index == dataLength, false);
+ return true;
+}
+
+BamRecordBuilder& BamRecordBuilder::Cigar(const PacBio::BAM::Cigar& cigar)
+{
+ core_.n_cigar = cigar.size();
+ cigar_ = cigar;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::Cigar(PacBio::BAM::Cigar&& cigar)
+{
+ core_.n_cigar = cigar.size();
+ cigar_ = std::move(cigar);
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::Name(const std::string& name)
+{
+ core_.l_qname = name.size() + 1; // (NULL-term)
+ name_ = name;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::Name(std::string&& name)
+{
+ core_.l_qname = name.size() + 1; // (NULL-term)
+ name_ = std::move(name);
+ return *this;
+}
+
+void BamRecordBuilder::Reset(void)
+{
+ // zeroize fixed-length data
+ memset(&core_, 0, sizeof(bam1_core_t));
+ core_.l_qname = 1; // always has a NULL-term
+
+ // reset variable-length data
+ name_.clear();
+ sequence_.clear();
+ qualities_.clear();
+ cigar_.clear();
+ tags_.clear();
+}
+
+void BamRecordBuilder::Reset(const BamRecord& prototype)
+{
+ // ensure clean slate
+ Reset();
+ header_ = prototype.Header();
+
+ // reset core data
+ const PBBAM_SHARED_PTR<bam1_t> rawData = internal::BamRecordMemory::GetRawData(prototype); // prototype.impl_.RawData().get();
+ PB_ASSERT_OR_RETURN(rawData);
+ core_ = rawData->core;
+
+ // reset variable-length data
+ const BamRecordImpl& impl = internal::BamRecordMemory::GetImpl(prototype);
+ name_ = impl.Name();
+ sequence_ = impl.Sequence();
+ qualities_ = impl.Qualities().Fastq();
+ cigar_ = impl.CigarData();
+ tags_ = impl.Tags();
+}
+
+void BamRecordBuilder::Reset(BamRecord&& prototype)
+{
+ // ensure clean slate
+ Reset();
+ header_ = prototype.Header();
+
+ // reset core data
+ const PBBAM_SHARED_PTR<bam1_t> rawData = internal::BamRecordMemory::GetRawData(prototype); // prototype.impl_.RawData().get();
+ PB_ASSERT_OR_RETURN(rawData);
+ core_ = std::move(rawData->core);
+
+ // reset variable-length data
+ const BamRecordImpl& impl = internal::BamRecordMemory::GetImpl(prototype);
+ name_ = impl.Name();
+ sequence_ = impl.Sequence();
+ qualities_ = impl.Qualities().Fastq();
+ cigar_ = impl.CigarData();
+ tags_ = impl.Tags();
+}
+
+BamRecordBuilder& BamRecordBuilder::Sequence(const std::string& sequence)
+{
+ core_.l_qseq = sequence.size();
+ sequence_ = sequence;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::Sequence(std::string&& sequence)
+{
+ core_.l_qseq = sequence.size();
+ sequence_ = std::move(sequence);
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::SetDuplicate(bool ok)
+{
+ if (ok) core_.flag |= BamRecordImpl::DUPLICATE;
+ else core_.flag &= ~BamRecordImpl::DUPLICATE;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::SetFailedQC(bool ok)
+{
+ if (ok) core_.flag |= BamRecordImpl::FAILED_QC;
+ else core_.flag &= ~BamRecordImpl::FAILED_QC;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::SetFirstMate(bool ok)
+{
+ if (ok) core_.flag |= BamRecordImpl::MATE_1;
+ else core_.flag &= ~BamRecordImpl::MATE_1;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::SetMapped(bool ok)
+{
+ if (ok) core_.flag &= ~BamRecordImpl::UNMAPPED;
+ else core_.flag |= BamRecordImpl::UNMAPPED;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::SetMateMapped(bool ok)
+{
+ if (ok) core_.flag &= ~BamRecordImpl::MATE_UNMAPPED;
+ else core_.flag |= BamRecordImpl::MATE_UNMAPPED;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::SetMateReverseStrand(bool ok)
+{
+ if (ok) core_.flag |= BamRecordImpl::MATE_REVERSE_STRAND;
+ else core_.flag &= ~BamRecordImpl::MATE_REVERSE_STRAND;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::SetPaired(bool ok)
+{
+ if (ok) core_.flag |= BamRecordImpl::PAIRED;
+ else core_.flag &= ~BamRecordImpl::PAIRED;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::SetPrimaryAlignment(bool ok)
+{
+ if (ok) core_.flag &= ~BamRecordImpl::SECONDARY;
+ else core_.flag |= BamRecordImpl::SECONDARY;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::SetProperPair(bool ok)
+{
+ if (ok) core_.flag |= BamRecordImpl::PROPER_PAIR;
+ else core_.flag &= ~BamRecordImpl::PROPER_PAIR;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::SetReverseStrand(bool ok)
+{
+ if (ok) core_.flag |= BamRecordImpl::REVERSE_STRAND;
+ else core_.flag &= ~BamRecordImpl::REVERSE_STRAND;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::SetSecondMate(bool ok)
+{
+ if (ok) core_.flag |= BamRecordImpl::MATE_2;
+ else core_.flag &= ~BamRecordImpl::MATE_2;
+ return *this;
+}
+
+BamRecordBuilder& BamRecordBuilder::SetSupplementaryAlignment(bool ok)
+{
+ if (ok) core_.flag |= BamRecordImpl::SUPPLEMENTARY;
+ else core_.flag &= ~BamRecordImpl::SUPPLEMENTARY;
+ return *this;
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "pbbam/BamRecordImpl.h"
+#include "pbbam/BamTagCodec.h"
+#include "AssertUtils.h"
+#include "BamRecordTags.h"
+#include "MemoryUtils.h"
+#include <algorithm>
+#include <iostream>
+#include <utility>
+#include <cstdlib>
+#include <cstring>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+BamRecordImpl::BamRecordImpl(void)
+ : d_(nullptr)
+{
+ InitializeData();
+}
+
+BamRecordImpl::BamRecordImpl(const BamRecordImpl& other)
+ : d_(bam_dup1(other.d_.get()), internal::HtslibRecordDeleter())
+ , tagOffsets_(other.tagOffsets_)
+{ }
+
+BamRecordImpl::BamRecordImpl(BamRecordImpl&& other)
+ : d_(nullptr)
+ , tagOffsets_(std::move(other.tagOffsets_))
+{
+ d_.swap(other.d_);
+ other.d_.reset();
+}
+
+BamRecordImpl& BamRecordImpl::operator=(const BamRecordImpl& other)
+{
+ if (this != & other) {
+ if (d_ == nullptr)
+ InitializeData();
+ bam_copy1(d_.get(), other.d_.get());
+ tagOffsets_ = other.tagOffsets_;
+ }
+ return *this;
+}
+
+BamRecordImpl& BamRecordImpl::operator=(BamRecordImpl&& other)
+{
+ if (this != & other) {
+ d_.swap(other.d_);
+ other.d_.reset();
+
+ tagOffsets_ = std::move(other.tagOffsets_);
+ }
+ return *this;
+}
+
+BamRecordImpl::~BamRecordImpl(void) { }
+
+bool BamRecordImpl::AddTag(const string& tagName,
+ const Tag &value)
+{
+ return AddTag(tagName, value, TagModifier::NONE);
+}
+
+bool BamRecordImpl::AddTag(const BamRecordTag tag,
+ const Tag& value)
+{
+ return AddTag(internal::BamRecordTags::LabelFor(tag),
+ value,
+ TagModifier::NONE);
+}
+
+bool BamRecordImpl::AddTag(const string& tagName,
+ const Tag& value,
+ const TagModifier additionalModifier)
+{
+ if (tagName.size() != 2 || HasTag(tagName))
+ return false;
+ const bool added = AddTagImpl(tagName, value, additionalModifier);
+ if (added)
+ UpdateTagMap();
+ return added;
+}
+
+bool BamRecordImpl::AddTag(const BamRecordTag tag,
+ const Tag& value,
+ const TagModifier additionalModifier)
+{
+ return AddTag(internal::BamRecordTags::LabelFor(tag),
+ value,
+ additionalModifier);
+}
+
+bool BamRecordImpl::AddTagImpl(const string& tagName,
+ const Tag& value,
+ const TagModifier additionalModifier)
+{
+ const vector<uint8_t> rawData = BamTagCodec::ToRawData(value, additionalModifier);
+ if (rawData.empty())
+ return false;
+
+ bam_aux_append(d_.get(),
+ tagName.c_str(),
+ BamTagCodec::TagTypeCode(value, additionalModifier),
+ rawData.size(),
+ const_cast<uint8_t*>(rawData.data()));
+ return true;
+}
+
+Cigar BamRecordImpl::CigarData(void) const
+{
+ Cigar result;
+ result.reserve(d_->core.n_cigar);
+ uint32_t* cigarData = bam_get_cigar(d_);
+ for (uint32_t i = 0; i < d_->core.n_cigar; ++i) {
+ const uint32_t length = bam_cigar_oplen(cigarData[i]);
+ const CigarOperationType type = static_cast<CigarOperationType>(bam_cigar_op(cigarData[i]));
+ result.push_back(CigarOperation(type, length));
+ }
+
+ return result;
+}
+
+BamRecordImpl& BamRecordImpl::CigarData(const Cigar& cigar)
+{
+ // determine change in memory needed
+ // diffNumBytes: pos -> growing, neg -> shrinking
+ const size_t numCigarOps = cigar.size();
+ const int diffNumCigars = numCigarOps - d_->core.n_cigar;
+ const int diffNumBytes = diffNumCigars * sizeof(uint32_t);
+ const int oldLengthData = d_->l_data;
+ d_->l_data += diffNumBytes;
+ MaybeReallocData();
+
+ // shift trailing data (seq, qual, tags) as needed
+ const uint8_t* oldSequenceStart = bam_get_seq(d_);
+ const size_t trailingDataLength = oldLengthData - (oldSequenceStart - d_->data);
+ d_->core.n_cigar = numCigarOps;
+ uint8_t* newSequenceStart = bam_get_seq(d_);
+ memmove(newSequenceStart, oldSequenceStart, trailingDataLength);
+
+ // fill in new CIGAR data
+ uint32_t* cigarDataStart = bam_get_cigar(d_);
+ for (size_t i = 0; i < numCigarOps; ++i) {
+ const CigarOperation& cigarOp = cigar.at(i);
+ cigarDataStart[i] = bam_cigar_gen(cigarOp.Length(), static_cast<int>(cigarOp.Type()));
+ }
+
+ return *this;
+}
+
+BamRecordImpl& BamRecordImpl::CigarData(const std::string& cigarString)
+{
+ return CigarData(Cigar::FromStdString(cigarString));
+}
+
+bool BamRecordImpl::EditTag(const string& tagName,
+ const Tag& newValue)
+{
+ return EditTag(tagName, newValue, TagModifier::NONE);
+}
+
+bool BamRecordImpl::EditTag(const BamRecordTag tag,
+ const Tag& newValue)
+{
+ return EditTag(internal::BamRecordTags::LabelFor(tag),
+ newValue,
+ TagModifier::NONE);
+}
+
+bool BamRecordImpl::EditTag(const string& tagName,
+ const Tag& newValue,
+ const TagModifier additionalModifier)
+{
+ // try remove old value (with delayed tag map update)
+ const bool removed = RemoveTagImpl(tagName);
+ if (!removed)
+ return false;
+
+ // if old value removed, add new value
+ const bool added = AddTagImpl(tagName, newValue, additionalModifier);
+ if (added)
+ UpdateTagMap();
+ return added;
+}
+
+bool BamRecordImpl::EditTag(const BamRecordTag tag,
+ const Tag& newValue,
+ const TagModifier additionalModifier)
+{
+ return EditTag(internal::BamRecordTags::LabelFor(tag),
+ newValue,
+ additionalModifier);
+}
+
+BamRecordImpl BamRecordImpl::FromRawData(const PBBAM_SHARED_PTR<bam1_t>& rawData)
+{
+ BamRecordImpl result;
+ bam_copy1(result.d_.get(), rawData.get());
+ return result;
+}
+
+bool BamRecordImpl::HasTag(const string& tagName) const
+{
+ if (tagName.size() != 2)
+ return false;
+ return TagOffset(tagName) != -1;
+
+ // 27635
+// return bam_aux_get(d_.get(), tagName.c_str()) != 0;
+}
+
+bool BamRecordImpl::HasTag(const BamRecordTag tag) const
+{
+ return HasTag(internal::BamRecordTags::LabelFor(tag));
+}
+
+void BamRecordImpl::InitializeData(void)
+{
+ d_.reset(bam_init1(), internal::HtslibRecordDeleter());
+ d_->data = (uint8_t*)(calloc(0x800, sizeof(uint8_t))); // maybe make this value tune-able later?
+
+ // init unmapped
+ Position(PacBio::BAM::UnmappedPosition);
+ MatePosition(PacBio::BAM::UnmappedPosition);
+ ReferenceId(-1);
+ MateReferenceId(-1);
+ SetMapped(false);
+ MapQuality(255);
+
+ // initialized with NULL term for qname
+ d_->core.l_qname = 1;
+ d_->l_data = 1;
+ d_->m_data = 0x800;
+}
+
+void BamRecordImpl::MaybeReallocData(void)
+{
+ // about to grow data contents to l_data size, but m_data is our current max.
+ // so we may need to grow. if so, use kroundup to double to next power of 2
+ if (d_->m_data < d_->l_data) {
+ d_->m_data = d_->l_data;
+ kroundup32(d_->m_data);
+ d_->data = static_cast<uint8_t*>(realloc(d_->data, d_->m_data));
+ }
+}
+
+string BamRecordImpl::Name(void) const
+{
+ return string(bam_get_qname(d_));
+}
+
+BamRecordImpl& BamRecordImpl::Name(const std::string& name)
+{
+ // determine change in memory needed
+ // diffNumBytes: pos -> growing, neg -> shrinking
+ const size_t numChars = name.size() + 1; // +1 for NULL-term
+ const int diffNumBytes = numChars - d_->core.l_qname;
+ const int oldLengthData = d_->l_data;
+ d_->l_data += diffNumBytes;
+ MaybeReallocData();
+
+ // shift trailing data (cigar, seq, qual, tags) as needed
+ const uint32_t* oldCigarStart = bam_get_cigar(d_);
+ const size_t trailingDataLength = oldLengthData - ((uint8_t*)oldCigarStart - d_->data);
+ d_->core.l_qname = numChars;
+ uint32_t* newCigarStart = bam_get_cigar(d_);
+ memmove(newCigarStart, oldCigarStart, trailingDataLength);
+
+ // fill in new name
+ memcpy(d_->data, name.c_str(), numChars);
+ return *this;
+}
+
+QualityValues BamRecordImpl::Qualities(void) const
+{
+ if (d_->core.l_qseq == 0)
+ return QualityValues();
+
+ uint8_t* qualData = bam_get_qual(d_);
+ if (qualData[0] == 0xff)
+ return QualityValues();
+
+ const size_t numQuals = d_->core.l_qseq;
+ QualityValues result;
+ result.reserve(numQuals);
+ for (size_t i = 0; i < numQuals; ++i)
+ result.push_back(QualityValue(qualData[i]));
+ return result;
+}
+
+bool BamRecordImpl::RemoveTag(const string& tagName)
+{
+ const bool removed = RemoveTagImpl(tagName);
+ if (removed)
+ UpdateTagMap();
+ return removed;
+}
+
+bool BamRecordImpl::RemoveTag(const BamRecordTag tag)
+{
+ return RemoveTag(internal::BamRecordTags::LabelFor(tag));
+}
+
+bool BamRecordImpl::RemoveTagImpl(const string &tagName)
+{
+ if (tagName.size() != 2)
+ return false;
+ uint8_t* data = bam_aux_get(d_.get(), tagName.c_str());
+ if (data == 0)
+ return false;
+ const bool ok = bam_aux_del(d_.get(), data) == 0;
+ return ok;
+}
+
+string BamRecordImpl::Sequence(void) const
+{
+ string result;
+ result.reserve(d_->core.l_qseq);
+ static const string DnaLookup = string("=ACMGRSVTWYHKDBN");
+ const uint8_t* seqData = bam_get_seq(d_);
+ for (int i = 0; i < d_->core.l_qseq; ++i)
+ result.append(1, DnaLookup[bam_seqi(seqData, i)]);
+ return result;
+}
+
+size_t BamRecordImpl::SequenceLength(void) const
+{ return d_->core.l_qseq; }
+
+BamRecordImpl& BamRecordImpl::SetSequenceAndQualities(const std::string& sequence,
+ const std::string& qualities)
+{
+ // TODO: I'm ok with the assert for now, but how to handle at runtime?
+ if (!qualities.empty()) {
+ PB_ASSERT_OR_RETURN_VALUE(sequence.size() == qualities.size(), *this);
+ }
+
+ return SetSequenceAndQualitiesInternal(sequence.c_str(),
+ sequence.size(),
+ qualities.c_str(),
+ false);
+}
+
+BamRecordImpl& BamRecordImpl::SetSequenceAndQualities(const char* sequence,
+ const size_t sequenceLength,
+ const char* qualities)
+{
+ return SetSequenceAndQualitiesInternal(sequence,
+ sequenceLength,
+ qualities,
+ false);
+}
+
+BamRecordImpl& BamRecordImpl::SetPreencodedSequenceAndQualities(const char* encodedSequence,
+ const size_t rawSequenceLength,
+ const char* qualities)
+{
+ return SetSequenceAndQualitiesInternal(encodedSequence,
+ rawSequenceLength,
+ qualities,
+ true);
+}
+
+BamRecordImpl& BamRecordImpl::SetSequenceAndQualitiesInternal(const char* sequence,
+ const size_t sequenceLength,
+ const char* qualities,
+ bool isPreencoded)
+{
+ // determine change in memory needed
+ // diffNumBytes: pos -> growing, neg -> shrinking
+ const int encodedSequenceLength = static_cast<int>((sequenceLength+1)/2);
+ const int oldSeqAndQualLength = static_cast<int>((d_->core.l_qseq+1)/2) + d_->core.l_qseq; // encoded seq + qual
+ const int newSeqAndQualLength = encodedSequenceLength + sequenceLength; // encoded seq + qual
+ const int diffNumBytes = newSeqAndQualLength - oldSeqAndQualLength;
+ const int oldLengthData = d_->l_data;
+ d_->l_data += diffNumBytes;
+ MaybeReallocData();
+
+ // shift trailing data (tags) as needed
+ const uint8_t* oldTagStart = bam_get_aux(d_);
+ const size_t trailingDataLength = oldLengthData - ((uint8_t*)oldTagStart - d_->data);
+ d_->core.l_qseq = sequenceLength;
+ uint8_t* newTagStart = bam_get_aux(d_);
+ memmove(newTagStart, oldTagStart, trailingDataLength);
+
+ // fill in new sequence
+ uint8_t* pEncodedSequence = bam_get_seq(d_);
+ if (isPreencoded) {
+ memcpy(pEncodedSequence, sequence, encodedSequenceLength);
+ } else {
+ memset(pEncodedSequence, 0, encodedSequenceLength);
+ for (size_t i = 0; i < sequenceLength; ++i)
+ pEncodedSequence[i>>1] |= seq_nt16_table[(int)sequence[i]] << ((~i&1)<<2);
+ }
+
+ // fill in quality values
+ uint8_t* encodedQualities = bam_get_qual(d_);
+ if ( (qualities == 0 ) || (strlen(qualities) == 0) )
+ memset(encodedQualities, 0xff, sequenceLength);
+ else {
+ for (size_t i = 0; i < sequenceLength; ++i)
+ encodedQualities[i] = qualities[i] - 33; // FASTQ ASCII -> int conversion
+ }
+ return *this;
+}
+
+int BamRecordImpl::TagOffset(const string& tagName) const
+{
+ if (tagName.size() != 2)
+ throw std::runtime_error("invalid tag name size");
+
+ if (tagOffsets_.empty())
+ UpdateTagMap();
+
+ const uint16_t tagCode = (static_cast<uint8_t>(tagName.at(0)) << 8) | static_cast<uint8_t>(tagName.at(1));
+ const auto found = tagOffsets_.find(tagCode);
+ return (found != tagOffsets_.cend() ? found->second : -1);
+}
+
+BamRecordImpl& BamRecordImpl::Tags(const TagCollection& tags)
+{
+ // convert tags to binary
+ const vector<uint8_t>& tagData = BamTagCodec::Encode(tags);
+ const size_t numBytes = tagData.size();
+ const uint8_t* data = tagData.data();
+
+ // determine change in memory needed
+ uint8_t* tagStart = bam_get_aux(d_);
+ const size_t oldNumBytes = d_->l_data - (tagStart - d_->data);
+ const int diffNumBytes = numBytes - oldNumBytes;
+ d_->l_data += diffNumBytes;
+ MaybeReallocData();
+ tagStart = bam_get_aux(d_);
+
+ // fill in new tag data
+ memcpy((void*)tagStart, data, numBytes);
+
+ // update tag info
+ UpdateTagMap();
+ return *this;
+}
+
+TagCollection BamRecordImpl::Tags(void) const
+{
+ const uint8_t* tagDataStart = bam_get_aux(d_);
+ const size_t numBytes = d_->l_data - (tagDataStart - d_->data);
+ return BamTagCodec::Decode(vector<uint8_t>(tagDataStart, tagDataStart+numBytes));
+}
+
+Tag BamRecordImpl::TagValue(const string& tagName) const
+{
+ if (tagName.size() != 2)
+ return Tag();
+
+ const int offset = TagOffset(tagName);
+ if (offset == -1)
+ return Tag();
+
+ bam1_t* b = d_.get();
+ assert(bam_get_aux(b));
+ uint8_t* tagData = bam_get_aux(b) + offset;
+ if (offset >= b->l_data)
+ return Tag();
+
+ // skip tag name
+ return BamTagCodec::FromRawData(tagData);
+}
+
+Tag BamRecordImpl::TagValue(const BamRecordTag tag) const
+{
+ return TagValue(internal::BamRecordTags::LabelFor(tag));
+}
+
+void BamRecordImpl::UpdateTagMap(void) const
+{
+ // clear out offsets, leave map structure basically intact
+ auto tagIter = tagOffsets_.begin();
+ auto tagEnd = tagOffsets_.end();
+ for ( ; tagIter != tagEnd; ++tagIter )
+ tagIter->second = -1;
+
+ const uint8_t* tagStart = bam_get_aux(d_);
+ if (tagStart == 0)
+ return;
+ const ptrdiff_t numBytes = d_->l_data - (tagStart - d_->data);
+
+ // NOTE: using a 16-bit 'code' for tag name here instead of string, to avoid
+ // a lot of string constructions & comparisons. All valid tags will be 2 chars
+ // anyway, so this should be a nice lookup mechanism.
+ //
+ uint16_t tagNameCode;
+ int64_t i = 0;
+ while(i < numBytes) {
+
+ // store (tag name code -> start offset into tag data)
+ tagNameCode = static_cast<char>(tagStart[i]) << 8 | static_cast<char>(tagStart[i+1]);
+ i += 2;
+ tagOffsets_[tagNameCode] = i;
+
+ // skip tag contents
+ const char tagType = static_cast<char>(tagStart[i++]);
+ switch (tagType) {
+ case 'A' :
+ case 'a' :
+ case 'c' :
+ case 'C' :
+ {
+ i += 1;
+ break;
+ }
+ case 's' :
+ case 'S' :
+ {
+ i += 2;
+ break;
+ }
+ case 'i' :
+ case 'I' :
+ case 'f' :
+ {
+ i += 4;
+ break;
+ }
+
+ case 'Z' :
+ case 'H' :
+ {
+ // null-terminated string
+ i += strlen((const char*)&tagStart[i]) + 1;
+ break;
+ }
+
+ case 'B' :
+ {
+ const char subTagType = tagStart[i++];
+ size_t elementSize = 0;
+ switch (subTagType) {
+ case 'c' :
+ case 'C' : elementSize = 1; break;
+ case 's' :
+ case 'S' : elementSize = 2; break;
+ case 'i' :
+ case 'I' :
+ case 'f' : elementSize = 4; break;
+
+ // unknown subTagType
+ default:
+ PB_ASSERT_OR_RETURN(false);
+ }
+
+ uint32_t numElements = 0;
+ memcpy(&numElements, &tagStart[i], sizeof(uint32_t));
+ i += (4 + (elementSize * numElements));
+ break;
+ }
+
+ // unknown tagType
+ default:
+ PB_ASSERT_OR_RETURN(false);
+ }
+ }
+}
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecordTags.h
+/// \brief Implements the BamRecordTags utility class.
+//
+// Author: Derek Barnett
+
+#include "BamRecordTags.h"
+#include "EnumClassHash.h"
+#include <unordered_map>
+#include <cassert>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+const BamRecordTags::TagLookupType BamRecordTags::tagLookup =
+{
+ // enum name label isPulse?
+ // --------- ----- --------
+ { BamRecordTag::ALT_LABEL_QV, {"pv", true} },
+ { BamRecordTag::ALT_LABEL_TAG, {"pt", true} },
+ { BamRecordTag::BARCODE_QUALITY, {"bq", false} },
+ { BamRecordTag::BARCODES, {"bc", false} },
+ { BamRecordTag::CONTEXT_FLAGS, {"cx", false} },
+ { BamRecordTag::DELETION_QV, {"dq", false} },
+ { BamRecordTag::DELETION_TAG, {"dt", false} },
+ { BamRecordTag::HOLE_NUMBER, {"zm", false} },
+ { BamRecordTag::INSERTION_QV, {"iq", false} },
+ { BamRecordTag::IPD, {"ip", false} },
+ { BamRecordTag::LABEL_QV, {"pq", true} },
+ { BamRecordTag::MERGE_QV, {"mq", false} },
+ { BamRecordTag::NUM_PASSES, {"np", false} },
+ { BamRecordTag::PKMEAN, {"pa", true} },
+ { BamRecordTag::PKMEAN_2, {"ps", true} },
+ { BamRecordTag::PKMID, {"pm", true} },
+ { BamRecordTag::PKMID_2, {"pi", true} },
+ { BamRecordTag::PRE_PULSE_FRAMES, {"pd", true} },
+ { BamRecordTag::PULSE_CALL, {"pc", true} },
+ { BamRecordTag::PULSE_CALL_WIDTH, {"px", true} },
+ { BamRecordTag::PULSE_MERGE_QV, {"pg", true} },
+ { BamRecordTag::PULSE_WIDTH, {"pw", false} }, // 'pulse' in the name; but stored per-base, not per-pulse
+ { BamRecordTag::QUERY_END, {"qe", false} },
+ { BamRecordTag::QUERY_START, {"qs", false} },
+ { BamRecordTag::READ_ACCURACY, {"rq", false} },
+ { BamRecordTag::READ_GROUP, {"RG", false} },
+ { BamRecordTag::SCRAP_REGION_TYPE, {"sc", false} },
+ { BamRecordTag::SCRAP_ZMW_TYPE, {"sz", false} },
+ { BamRecordTag::SNR, {"sn", false} },
+ { BamRecordTag::START_FRAME, {"sf", true} },
+ { BamRecordTag::SUBSTITUTION_QV, {"sq", false} },
+ { BamRecordTag::SUBSTITUTION_TAG, {"st", false} },
+
+ // faux tags
+ { BamRecordTag::SEQ, {" ", false} },
+ { BamRecordTag::QUAL, {" ", false} }
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamRecordTags.h
+/// \brief Defines the BamRecordTags utility class.
+//
+// Author: Derek Barnett
+
+#ifndef BAMRECORDTAGS_H
+#define BAMRECORDTAGS_H
+
+#include "pbbam/BamRecord.h"
+#include "pbbam/BamRecordImpl.h"
+#include "pbbam/BamRecordTag.h"
+#include "EnumClassHash.h"
+#include <string>
+#include <unordered_map>
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class BamRecordTags
+{
+public:
+ // tag info
+ static inline bool IsPulse(const BamRecordTag tag);
+ static inline std::string LabelFor(const BamRecordTag tag);
+
+private:
+ struct BamRecordTagData
+ {
+ const std::string label_; //[3]; // 2-char tag plus NULL
+ const bool isPulse_;
+ };
+ typedef std::unordered_map<BamRecordTag,
+ BamRecordTagData,
+ EnumClassHash> TagLookupType;
+
+ static const TagLookupType tagLookup;
+};
+
+inline bool BamRecordTags::IsPulse(const BamRecordTag tag)
+{
+ assert(tagLookup.find(tag) != tagLookup.cend());
+ return tagLookup.at(tag).isPulse_;
+}
+
+inline std::string BamRecordTags::LabelFor(const BamRecordTag tag)
+{
+ assert(tagLookup.find(tag) != tagLookup.cend());
+ return tagLookup.at(tag).label_;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // BAMRECORDTAGS_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BamTagCodec.cpp
+/// \brief Implements the BamTagCodec class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BamTagCodec.h"
+#include "AssertUtils.h"
+#include <htslib/kstring.h>
+#include <cstring>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+template<typename T>
+inline void appendBamValue(const T& value, kstring_t* str)
+{
+ kputsn_((char*)&value, sizeof(value), str);
+}
+
+template<typename T>
+inline void appendBamMultiValue(const vector<T>& container, kstring_t* str)
+{
+ const uint32_t n = container.size();
+ kputsn_(&n, sizeof(n), str);
+ kputsn_((char*)&container[0], n*sizeof(T), str);
+}
+
+template<typename T>
+inline T readBamValue(const uint8_t* src, size_t& offset)
+{
+ T value;
+ memcpy(&value, &src[offset], sizeof(value));
+ offset += sizeof(value);
+ return value;
+}
+
+template<typename T>
+vector<T> readBamMultiValue(const uint8_t* src, size_t& offset)
+{
+ uint32_t numElements;
+ memcpy(&numElements, &src[offset], sizeof(uint32_t));
+ offset += 4;
+
+ vector<T> result;
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i) {
+ const T& value = readBamValue<T>(src, offset);
+ result.push_back(value);
+ }
+ return result;
+}
+
+TagCollection BamTagCodec::Decode(const vector<uint8_t>& data)
+{
+ TagCollection tags;
+
+ // NOTE: not completely safe - no real bounds-checking yet on input data
+
+ const uint8_t* pData = data.data();
+ const size_t numBytes = data.size();
+ size_t i = 0;
+ while (i < numBytes) {
+
+ string tagName;
+ tagName.reserve(2);
+ tagName.append(1, pData[i++]);
+ tagName.append(1, pData[i++]);
+
+ const char tagType = static_cast<char>(pData[i++]);
+ switch (tagType) {
+ case 'A' :
+ case 'a' :
+ {
+ tags[tagName] = readBamValue<uint8_t>(pData, i);
+ tags[tagName].Modifier(TagModifier::ASCII_CHAR);
+ break;
+ }
+
+ case 'c' : tags[tagName] = readBamValue<int8_t>(pData, i); break;
+ case 'C' : tags[tagName] = readBamValue<uint8_t>(pData, i); break;
+ case 's' : tags[tagName] = readBamValue<int16_t>(pData, i); break;
+ case 'S' : tags[tagName] = readBamValue<uint16_t>(pData, i); break;
+ case 'i' : tags[tagName] = readBamValue<int32_t>(pData, i); break;
+ case 'I' : tags[tagName] = readBamValue<uint32_t>(pData, i); break;
+ case 'f' : tags[tagName] = readBamValue<float>(pData, i); break;
+
+ case 'Z' :
+ case 'H' :
+ {
+ const size_t dataLength = strlen((const char*)&pData[i]);
+ string value;
+ value.resize(dataLength);
+ memcpy((char*)value.data(), &pData[i], dataLength);
+ tags[tagName] = value;
+ if (tagType == 'H')
+ tags[tagName].Modifier(TagModifier::HEX_STRING);
+ i += dataLength + 1;
+ break;
+ }
+
+ case 'B' :
+ {
+ const char subTagType = pData[i++];
+ switch (subTagType) {
+ case 'c' : tags[tagName] = readBamMultiValue<int8_t>(pData, i); break;
+ case 'C' : tags[tagName] = readBamMultiValue<uint8_t>(pData, i); break;
+ case 's' : tags[tagName] = readBamMultiValue<int16_t>(pData, i); break;
+ case 'S' : tags[tagName] = readBamMultiValue<uint16_t>(pData, i); break;
+ case 'i' : tags[tagName] = readBamMultiValue<int32_t>(pData, i); break;
+ case 'I' : tags[tagName] = readBamMultiValue<uint32_t>(pData, i); break;
+ case 'f' : tags[tagName] = readBamMultiValue<float>(pData, i); break;
+
+ // unknown subTagType
+ default:
+ PB_ASSERT_OR_RETURN_VALUE(false, TagCollection());
+ }
+ break;
+ }
+
+ // unknown tagType
+ default:
+ PB_ASSERT_OR_RETURN_VALUE(false, TagCollection());
+ }
+ }
+
+ return tags;
+}
+
+vector<uint8_t> BamTagCodec::Encode(const TagCollection& tags)
+{
+ kstring_t str = { 0, 0, NULL };
+
+ const auto tagEnd = tags.cend();
+ for (auto tagIter = tags.cbegin(); tagIter != tagEnd; ++tagIter) {
+ const string& name = (*tagIter).first;
+ const Tag& tag = (*tagIter).second;
+ PB_ASSERT_OR_CONTINUE(name.size() == 2);
+ if (tag.IsNull())
+ continue;
+
+ // "<TAG>:"
+ kputsn_(name.c_str(), 2, &str);
+
+ // "<TYPE>:<DATA>" for printable, ASCII char
+ if (tag.HasModifier(TagModifier::ASCII_CHAR)) {
+ char c = tag.ToAscii();
+ if (c != '\0') {
+ kputc_('A', &str);
+ kputc_(c, &str);
+ continue;
+ }
+ }
+
+ // "<TYPE>:<DATA>" for all other data
+ switch ( tag.Type() ) {
+ case TagDataType::INT8 :
+ {
+ kputc_('c', &str);
+ appendBamValue(tag.ToInt8(), &str);
+ break;
+ }
+ case TagDataType::UINT8 :
+ {
+ kputc_('C', &str);
+ appendBamValue(tag.ToUInt8(), &str);
+ break;
+ }
+ case TagDataType::INT16 :
+ {
+ kputc_('s', &str);
+ appendBamValue(tag.ToInt16(), &str);
+ break;
+ }
+ case TagDataType::UINT16 :
+ {
+ kputc_('S', &str);
+ appendBamValue(tag.ToUInt16(), &str);
+ break;
+ }
+ case TagDataType::INT32 :
+ {
+ kputc_('i', &str);
+ appendBamValue(tag.ToInt32(), &str);
+ break;
+ }
+ case TagDataType::UINT32 :
+ {
+ kputc_('I', &str);
+ appendBamValue(tag.ToUInt32(), &str);
+ break;
+ }
+ case TagDataType::FLOAT :
+ {
+ kputc_('f', &str);
+ appendBamValue(tag.ToFloat(), &str);
+ break;
+ }
+
+ case TagDataType::STRING :
+ {
+ if (tag.HasModifier(TagModifier::HEX_STRING))
+ kputc_('H', &str);
+ else
+ kputc_('Z', &str);
+ const string& s = tag.ToString();
+ kputsn_(s.c_str(), s.size()+1, &str); // this adds the null-term
+ break;
+ }
+
+ case TagDataType::INT8_ARRAY :
+ {
+ kputc_('B', &str);
+ kputc_('c', &str);
+ appendBamMultiValue(tag.ToInt8Array(), &str);
+ break;
+ }
+ case TagDataType::UINT8_ARRAY :
+ {
+ kputc_('B', &str);
+ kputc_('C', &str);
+ appendBamMultiValue(tag.ToUInt8Array(), &str);
+ break;
+ }
+ case TagDataType::INT16_ARRAY :
+ {
+ kputc_('B', &str);
+ kputc_('s', &str);
+ appendBamMultiValue(tag.ToInt16Array(), &str);
+ break;
+ }
+ case TagDataType::UINT16_ARRAY :
+ {
+ kputc_('B', &str);
+ kputc_('S', &str);
+ appendBamMultiValue(tag.ToUInt16Array(), &str);
+ break;
+ }
+ case TagDataType::INT32_ARRAY :
+ {
+ kputc_('B', &str);
+ kputc_('i', &str);
+ appendBamMultiValue(tag.ToInt32Array(), &str);
+ break;
+ }
+ case TagDataType::UINT32_ARRAY :
+ {
+ kputc_('B', &str);
+ kputc_('I', &str);
+ appendBamMultiValue(tag.ToUInt32Array(), &str);
+ break;
+ }
+ case TagDataType::FLOAT_ARRAY :
+ {
+ kputc_('B', &str);
+ kputc_('f', &str);
+ appendBamMultiValue(tag.ToFloatArray(), &str);
+ break;
+ }
+
+ // unsupported tag type
+ default :
+ free(str.s);
+ PB_ASSERT_OR_RETURN_VALUE(false, vector<uint8_t>());
+ }
+ }
+
+ vector<uint8_t> result;
+ result.resize(str.l);
+ memcpy((char*)&result[0], str.s, str.l);
+ free(str.s);
+ return result;
+}
+
+Tag BamTagCodec::FromRawData(uint8_t* rawData)
+{
+ size_t offset = 0;
+ const char tagType = static_cast<char>(*rawData++);
+ switch (tagType) {
+ case 'A' :
+ case 'a' :
+ {
+ Tag t = Tag(readBamValue<uint8_t>(rawData, offset));
+ t.Modifier(TagModifier::ASCII_CHAR);
+ return t;
+ }
+
+ case 'c' : return Tag(readBamValue<int8_t>(rawData, offset));
+ case 'C' : return Tag(readBamValue<uint8_t>(rawData, offset));
+ case 's' : return Tag(readBamValue<int16_t>(rawData, offset));
+ case 'S' : return Tag(readBamValue<uint16_t>(rawData, offset));
+ case 'i' : return Tag(readBamValue<int32_t>(rawData, offset));
+ case 'I' : return Tag(readBamValue<uint32_t>(rawData, offset));
+ case 'f' : return Tag(readBamValue<float>(rawData, offset));
+
+ case 'Z' :
+ case 'H' :
+ {
+ const size_t dataLength = strlen((const char*)&rawData[0]);
+ string value;
+ value.resize(dataLength);
+ memcpy((char*)value.data(), &rawData[0], dataLength);
+ Tag t(value);
+ if (tagType == 'H')
+ t.Modifier(TagModifier::HEX_STRING);
+ return t;
+ }
+
+ case 'B' :
+ {
+ const char subTagType = *rawData++;
+ switch (subTagType) {
+
+ case 'c' : return Tag(readBamMultiValue<int8_t>(rawData, offset));
+ case 'C' : return Tag(readBamMultiValue<uint8_t>(rawData, offset));
+ case 's' : return Tag(readBamMultiValue<int16_t>(rawData, offset));
+ case 'S' : return Tag(readBamMultiValue<uint16_t>(rawData, offset));
+ case 'i' : return Tag(readBamMultiValue<int32_t>(rawData, offset));
+ case 'I' : return Tag(readBamMultiValue<uint32_t>(rawData, offset));
+ case 'f' : return Tag(readBamMultiValue<float>(rawData, offset));
+
+ // unknown subTagType
+ default:
+ PB_ASSERT_OR_RETURN_VALUE(false, Tag());
+ }
+ break;
+ }
+
+ // unknown tagType
+ default:
+ PB_ASSERT_OR_RETURN_VALUE(false, Tag());
+ }
+ return Tag(); // to avoid compiler warning
+}
+
+vector<uint8_t> BamTagCodec::ToRawData(const Tag& tag,
+ const TagModifier& additionalModifier)
+{
+ // temp raw data destination (for use with htslib methods)
+ kstring_t str = { 0, 0, NULL };
+
+ // "<TYPE>:<DATA>" for printable, ASCII char
+ if (tag.HasModifier(TagModifier::ASCII_CHAR) || additionalModifier == TagModifier::ASCII_CHAR) {
+ const char c = tag.ToAscii();
+ if (c != '\0')
+ kputc_(c, &str);
+ }
+
+ // for all others
+ else {
+ switch (tag.Type()) {
+
+ // single, numeric values
+ case TagDataType::INT8 : appendBamValue(tag.ToInt8(), &str); break;
+ case TagDataType::UINT8 : appendBamValue(tag.ToUInt8(), &str); break;
+ case TagDataType::INT16 : appendBamValue(tag.ToInt16(), &str); break;
+ case TagDataType::UINT16 : appendBamValue(tag.ToUInt16(), &str); break;
+ case TagDataType::INT32 : appendBamValue(tag.ToInt32(), &str); break;
+ case TagDataType::UINT32 : appendBamValue(tag.ToUInt32(), &str); break;
+ case TagDataType::FLOAT : appendBamValue(tag.ToFloat(), &str); break;
+
+ // string & hex-string values
+ case TagDataType::STRING :
+ {
+ const string& s = tag.ToString();
+ kputsn_(s.c_str(), s.size()+1, &str); // this adds the null-term
+ break;
+ }
+
+ // array-type values
+ case TagDataType::INT8_ARRAY :
+ {
+ kputc_('c', &str);
+ appendBamMultiValue(tag.ToInt8Array(), &str);
+ break;
+ }
+ case TagDataType::UINT8_ARRAY :
+ {
+ kputc_('C', &str);
+ appendBamMultiValue(tag.ToUInt8Array(), &str);
+ break;
+ }
+ case TagDataType::INT16_ARRAY :
+ {
+ kputc_('s', &str);
+ appendBamMultiValue(tag.ToInt16Array(), &str);
+ break;
+ }
+ case TagDataType::UINT16_ARRAY :
+ {
+ kputc_('S', &str);
+ appendBamMultiValue(tag.ToUInt16Array(), &str);
+ break;
+ }
+ case TagDataType::INT32_ARRAY :
+ {
+ kputc_('i', &str);
+ appendBamMultiValue(tag.ToInt32Array(), &str);
+ break;
+ }
+ case TagDataType::UINT32_ARRAY :
+ {
+ kputc_('I', &str);
+ appendBamMultiValue(tag.ToUInt32Array(), &str);
+ break;
+ }
+ case TagDataType::FLOAT_ARRAY :
+ {
+ kputc_('f', &str);
+ appendBamMultiValue(tag.ToFloatArray(), &str);
+ break;
+ }
+
+ // unsupported tag type
+ default :
+ free(str.s);
+ PB_ASSERT_OR_RETURN_VALUE(false, vector<uint8_t>());
+ }
+ }
+
+ // store temp contents in actual destination
+ vector<uint8_t> result;
+ result.resize(str.l);
+ memcpy((char*)&result[0], str.s, str.l);
+ free(str.s);
+ return result;
+}
+
+uint8_t BamTagCodec::TagTypeCode(const Tag& tag,
+ const TagModifier& additionalModifier)
+{
+ if (tag.HasModifier(TagModifier::ASCII_CHAR) || additionalModifier == TagModifier::ASCII_CHAR) {
+ int64_t value = 0;
+ switch (tag.Type()) {
+ case TagDataType::INT8 : value = static_cast<int64_t>(tag.ToInt8()); break;
+ case TagDataType::UINT8 : value = static_cast<int64_t>(tag.ToUInt8()); break;
+ case TagDataType::INT16 : value = static_cast<int64_t>(tag.ToInt16()); break;
+ case TagDataType::UINT16 : value = static_cast<int64_t>(tag.ToUInt16()); break;
+ case TagDataType::INT32 : value = static_cast<int64_t>(tag.ToInt32()); break;
+ case TagDataType::UINT32 : value = static_cast<int64_t>(tag.ToUInt32()); break;
+ default:
+ // non integers not allowed
+ PB_ASSERT_OR_RETURN_VALUE(false, 0);
+ }
+ // printable range
+ PB_ASSERT_OR_RETURN_VALUE(value >= 33, 0);
+ PB_ASSERT_OR_RETURN_VALUE(value <= 126, 0);
+ return static_cast<uint8_t>('A');
+ }
+
+ switch (tag.Type()) {
+ case TagDataType::INT8 : return static_cast<uint8_t>('c');
+ case TagDataType::UINT8 : return static_cast<uint8_t>('C');
+ case TagDataType::INT16 : return static_cast<uint8_t>('s');
+ case TagDataType::UINT16 : return static_cast<uint8_t>('S');
+ case TagDataType::INT32 : return static_cast<uint8_t>('i');
+ case TagDataType::UINT32 : return static_cast<uint8_t>('I');
+ case TagDataType::FLOAT : return static_cast<uint8_t>('f');
+
+ case TagDataType::STRING :
+ {
+ if (tag.HasModifier(TagModifier::HEX_STRING) || additionalModifier == TagModifier::HEX_STRING)
+ return static_cast<uint8_t>('H');
+ else
+ return static_cast<uint8_t>('Z');
+ }
+
+ case TagDataType::INT8_ARRAY : // fall through
+ case TagDataType::UINT8_ARRAY : // .
+ case TagDataType::INT16_ARRAY : // .
+ case TagDataType::UINT16_ARRAY : // .
+ case TagDataType::INT32_ARRAY : // .
+ case TagDataType::UINT32_ARRAY : // .
+ case TagDataType::FLOAT_ARRAY : return static_cast<uint8_t>('B');
+
+ default:
+ PB_ASSERT_OR_RETURN_VALUE(false, 0);
+ }
+ return 0; // to avoid compiler warning
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "pbbam/BamWriter.h"
+#include "pbbam/BamFile.h"
+#include "pbbam/Validator.h"
+#include "AssertUtils.h"
+#include "FileProducer.h"
+#include "MemoryUtils.h"
+#include <htslib/bgzf.h>
+#include <htslib/hfile.h>
+#include <htslib/hts.h>
+#include <iostream>
+#include <thread>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class BamWriterPrivate : public internal::FileProducer
+{
+public:
+ BamWriterPrivate(const std::string& filename,
+ const PBBAM_SHARED_PTR<bam_hdr_t> rawHeader,
+ const BamWriter::CompressionLevel compressionLevel,
+ const size_t numThreads,
+ const BamWriter::BinCalculationMode binCalculationMode);
+
+public:
+ void Write(const BamRecord& record);
+ void Write(const BamRecord& record, int64_t* vOffset);
+ void Write(const BamRecordImpl& recordImpl);
+
+public:
+ bool calculateBins_;
+ std::unique_ptr<samFile, internal::HtslibFileDeleter> file_;
+ PBBAM_SHARED_PTR<bam_hdr_t> header_;
+};
+
+BamWriterPrivate::BamWriterPrivate(const string& filename,
+ const PBBAM_SHARED_PTR<bam_hdr_t> rawHeader,
+ const BamWriter::CompressionLevel compressionLevel,
+ const size_t numThreads,
+ const BamWriter::BinCalculationMode binCalculationMode)
+ : internal::FileProducer(filename)
+ , calculateBins_(binCalculationMode == BamWriter::BinCalculation_ON)
+ , file_(nullptr)
+ , header_(rawHeader)
+{
+ if (!header_)
+ throw std::runtime_error("null header");
+
+ // open file
+ const string& usingFilename = TempFilename();
+ const string& mode = string("wb") + to_string(static_cast<int>(compressionLevel));
+ file_.reset(sam_open(usingFilename.c_str(), mode.c_str()));
+ if (!file_)
+ throw std::runtime_error("could not open file for writing");
+
+ // if no explicit thread count given, attempt built-in check
+ size_t actualNumThreads = numThreads;
+ if (actualNumThreads == 0) {
+ actualNumThreads = thread::hardware_concurrency();
+
+ // if still unknown, default to single-threaded
+ if (actualNumThreads == 0)
+ actualNumThreads = 1;
+ }
+
+ // if multithreading requested, enable it
+ if (actualNumThreads > 1)
+ hts_set_threads(file_.get(), actualNumThreads);
+
+ // write header
+ const int ret = sam_hdr_write(file_.get(), header_.get());
+ if (ret != 0)
+ throw std::runtime_error("could not write header");
+}
+
+void BamWriterPrivate::Write(const BamRecord& record)
+{
+#if PBBAM_AUTOVALIDATE
+ Validator::Validate(record);
+#endif
+
+ const auto rawRecord = internal::BamRecordMemory::GetRawData(record);
+
+ // (probably) store bins
+ // min_shift=14 & n_lvls=5 are BAM "magic numbers"
+ if (calculateBins_)
+ rawRecord->core.bin = hts_reg2bin(rawRecord->core.pos, bam_endpos(rawRecord.get()), 14, 5);
+
+ // write record to file
+ const int ret = sam_write1(file_.get(), header_.get(), rawRecord.get());
+ if (ret <= 0)
+ throw std::runtime_error("could not write record");
+}
+
+void BamWriterPrivate::Write(const BamRecord& record, int64_t* vOffset)
+{
+ BGZF* bgzf = file_.get()->fp.bgzf;
+ assert(bgzf);
+ assert(vOffset);
+
+ // ensure offsets up-to-date
+ bgzf_flush(bgzf);
+
+ // capture virtual offset where we’re about to write
+ const off_t rawTell = htell(bgzf->fp);
+ const int length = bgzf->block_offset;
+ *vOffset = (rawTell << 16) | length ;
+
+ // now write data
+ Write(record);
+}
+
+inline void BamWriterPrivate::Write(const BamRecordImpl& recordImpl)
+{ Write(BamRecord(recordImpl)); }
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+BamWriter::BamWriter(const std::string& filename,
+ const BamHeader& header,
+ const BamWriter::CompressionLevel compressionLevel,
+ const size_t numThreads,
+ const BinCalculationMode binCalculationMode)
+ : IRecordWriter()
+ , d_(nullptr)
+{
+#if PBBAM_AUTOVALIDATE
+ Validator::Validate(header);
+#endif
+ d_.reset(new internal::BamWriterPrivate{ filename,
+ internal::BamHeaderMemory::MakeRawHeader(header),
+ compressionLevel,
+ numThreads,
+ binCalculationMode
+ });
+}
+
+BamWriter::~BamWriter(void)
+{
+ bgzf_flush(d_->file_.get()->fp.bgzf);
+}
+
+void BamWriter::TryFlush(void)
+{
+ // TODO: sanity checks on file_ & fp
+ const int ret = bgzf_flush(d_->file_.get()->fp.bgzf);
+ if (ret != 0)
+ throw std::runtime_error("could not flush output buffer contents");
+}
+
+void BamWriter::Write(const BamRecord& record)
+{ d_->Write(record); }
+
+void BamWriter::Write(const BamRecord& record, int64_t* vOffset)
+{ d_->Write(record, vOffset); }
+
+void BamWriter::Write(const BamRecordImpl& recordImpl)
+{ d_->Write(recordImpl); }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file BarcodeQuery.cpp
+/// \brief Implements the BarcodeQuery class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/BarcodeQuery.h"
+#include "pbbam/PbiFilterTypes.h"
+#include "pbbam/CompositeBamReader.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+struct BarcodeQuery::BarcodeQueryPrivate
+{
+ BarcodeQueryPrivate(const int16_t barcode, const DataSet& dataset)
+ : reader_(PbiBarcodeFilter(barcode), dataset)
+ { }
+
+ PbiFilterCompositeBamReader<Compare::None> reader_; // unsorted
+};
+
+BarcodeQuery::BarcodeQuery(const int16_t barcode,
+ const DataSet& dataset)
+ : internal::IQuery()
+ , d_(new BarcodeQueryPrivate(barcode, dataset))
+{ }
+
+BarcodeQuery::~BarcodeQuery(void) { }
+
+bool BarcodeQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
--- /dev/null
+
+# grab library source files
+include(files.cmake)
+set(SOURCES
+ ${PacBioBAM_H}
+ ${PacBioBAM_CPP}
+)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
+
+# define actual library
+add_library(pbbam ${SOURCES})
+
+# library properties
+target_compile_definitions(pbbam
+ PRIVATE "-DPBBAM_LIBRARY"
+)
+set_target_properties(pbbam PROPERTIES
+ ARCHIVE_OUTPUT_DIRECTORY ${PacBioBAM_LibDir}
+ RUNTIME_OUTPUT_DIRECTORY ${PacBioBAM_LibDir}
+ LIBRARY_OUTPUT_DIRECTORY ${PacBioBAM_LibDir}
+)
+
+if(PacBioBAM_wrap_r)
+ # SWIG R does not support std::shared_ptr, but it does support boost::shared_ptr
+ # So force boost if we're wrapping for R.
+ target_compile_definitions(pbbam
+ PUBLIC -DPBBAM_USE_BOOST_SHARED_PTR
+ )
+endif()
+
+if(PacBioBAM_auto_validate)
+ target_compile_definitions(pbbam
+ PUBLIC "-DPBBAM_AUTOVALIDATE=1"
+ )
+endif()
+
+# pbbam includes
+target_include_directories(pbbam
+ PUBLIC
+ ${PacBioBAM_IncludeDir}
+ ${HTSLIB_INCLUDE_DIRS}
+ ${Boost_INCLUDE_DIRS}
+ ${ZLIB_INCLUDE_DIRS}
+)
+
+# set link dependencies
+# if htslib provided externally
+if(HTSLIB_LIBRARIES)
+ set(pbbam_all_dependency_libs
+ ${HTSLIB_LIBRARIES}
+ ${ZLIB_LIBRARIES}
+ ${SOCKET_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT}
+ )
+# otherwise, use the "in-project" htslib target
+else()
+ set(pbbam_all_dependency_libs
+ $<TARGET_FILE:hts>
+ ${ZLIB_LIBRARIES}
+ ${SOCKET_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT}
+ )
+endif()
+
+target_link_libraries(pbbam
+ PUBLIC
+ ${pbbam_all_dependency_libs}
+)
+
+# define include paths for projects that use pbbam
+set(PacBioBAM_INCLUDE_DIRS
+ ${PacBioBAM_IncludeDir}
+ ${HTSLIB_INCLUDE_DIRS}
+ ${Boost_INCLUDE_DIRS}
+ ${ZLIB_INCLUDE_DIRS}
+ CACHE INTERNAL
+ "${PROJECT_NAME}: Include Directories"
+ FORCE
+)
+set(PacBioBAM_LIBRARIES
+ $<TARGET_FILE:pbbam>
+ ${pbbam_all_dependency_libs}
+ CACHE INTERNAL
+ "${PROJECT_NAME}: Libraries"
+ FORCE
+)
+
+# add SWIG directory
+add_subdirectory(swig)
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Lance Hepler
+
+#include "ChemistryTable.h"
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+extern const std::vector<std::array<std::string, 4>> ChemistryTable = {
+
+ // BindingKit, SequencingKit, BasecallerVersion, Chemistry
+
+ // RS
+ {{"100356300", "100356200", "2.1", "P6-C4"}},
+ {{"100356300", "100356200", "2.3", "P6-C4"}},
+ {{"100356300", "100612400", "2.1", "P6-C4"}},
+ {{"100356300", "100612400", "2.3", "P6-C4"}},
+ {{"100372700", "100356200", "2.1", "P6-C4"}},
+ {{"100372700", "100356200", "2.3", "P6-C4"}},
+ {{"100372700", "100612400", "2.1", "P6-C4"}},
+ {{"100372700", "100612400", "2.3", "P6-C4"}},
+
+ // 3.0 ("Dromedary"): S/P1-C1/beta
+ {{"100-619-300", "100-620-000", "3.0", "S/P1-C1/beta"}},
+ {{"100-619-300", "100-620-000", "3.1", "S/P1-C1/beta"}},
+
+ // 3.1 ("Echidna"): S/P1-C1.1
+ {{"100-619-300", "100-867-300", "3.1", "S/P1-C1.1"}},
+ {{"100-619-300", "100-867-300", "3.2", "S/P1-C1.1"}},
+ {{"100-619-300", "100-867-300", "3.3", "S/P1-C1.1"}},
+
+ // 3.1.1 ("Flea"): S/P1-C1.2
+ {{"100-619-300", "100-902-100", "3.1", "S/P1-C1.2"}},
+ {{"100-619-300", "100-902-100", "3.2", "S/P1-C1.2"}},
+ {{"100-619-300", "100-902-100", "3.3", "S/P1-C1.2"}},
+ {{"100-619-300", "100-902-100", "4.0", "S/P1-C1.2"}},
+
+ // 3.2 ("Goat"): S/P1-C1.3
+ {{"100-619-300", "100-972-200", "3.2", "S/P1-C1.3"}},
+ {{"100-619-300", "100-972-200", "3.3", "S/P1-C1.3"}},
+ {{"100-619-300", "100-972-200", "4.0", "S/P1-C1.3"}},
+
+ // 4.0 ("Seabiscuit"); S/P2-C2
+ {{"100-862-200", "100-861-800", "4.0", "S/P2-C2"}}
+
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Lance Hepler
+
+#ifndef CHEMISTRYTABLE_H
+#define CHEMISTRYTABLE_H
+
+#include <array>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+extern const std::vector<std::array<std::string, 4>> ChemistryTable;
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // CHEMISTRYTABLE_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Cigar.cpp
+/// \brief Implements the Cigar class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Cigar.h"
+#include <sstream>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+Cigar::Cigar(const string& cigarString)
+ : vector<CigarOperation>()
+{
+ size_t numberStart = 0;
+ const size_t numChars = cigarString.size();
+ for (size_t i = 0; i < numChars; ++i) {
+ const char c = cigarString.at(i);
+ if (!isdigit(c)) {
+ const size_t distance = i - numberStart;
+ const uint32_t length = stoul(cigarString.substr(numberStart, distance));
+ push_back(CigarOperation(c, length));
+ numberStart = i+1;
+ }
+ }
+}
+
+string Cigar::ToStdString(void) const
+{
+ stringstream s;
+ const auto end = this->cend();
+ for (auto iter = this->cbegin(); iter != end; ++iter) {
+ const CigarOperation& cigar = (*iter);
+ s << cigar.Length()
+ << cigar.Char();
+ }
+ return s.str();
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file CigarOperation.cpp
+/// \brief Implements the CigarOperation class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/CigarOperation.h"
+#include <htslib/sam.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+CigarOperationType CigarOperation::CharToType(const char c)
+{
+ switch(c)
+ {
+ case 'S' : return CigarOperationType::SOFT_CLIP;
+ case '=' : return CigarOperationType::SEQUENCE_MATCH;
+ case 'X' : return CigarOperationType::SEQUENCE_MISMATCH;
+ case 'I' : return CigarOperationType::INSERTION;
+ case 'D' : return CigarOperationType::DELETION;
+ case 'N' : return CigarOperationType::REFERENCE_SKIP;
+ case 'H' : return CigarOperationType::HARD_CLIP;
+ case 'P' : return CigarOperationType::PADDING;
+ case 'M' : return CigarOperationType::ALIGNMENT_MATCH;
+ default:
+ return CigarOperationType::UNKNOWN_OP;
+ }
+}
+
+char CigarOperation::TypeToChar(const CigarOperationType type)
+{ return bam_cigar_opchr(static_cast<int>(type)); }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Compare.cpp
+/// \brief Implements the Compare class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Compare.h"
+#include <functional>
+#include <unordered_map>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct TypeAlias
+{
+ string name_;
+ string op_;
+ string opAlpha_;
+
+ TypeAlias(const string& name = string(),
+ const string& op = string(),
+ const string& opAlpha = string())
+ : name_(name)
+ , op_(op)
+ , opAlpha_(opAlpha)
+ { }
+};
+
+struct CompareTypeHash
+{
+ size_t operator()(const Compare::Type& t) const
+ { return std::hash<int>()(static_cast<int>(t)); }
+};
+
+static const unordered_map<string, Compare::Type> opToTypeMap =
+{
+ // basic operators plus some permissiveness for other representations
+
+ { "==", Compare::EQUAL },
+ { "=", Compare::EQUAL },
+ { "eq", Compare::EQUAL },
+ { "!=", Compare::NOT_EQUAL },
+ { "ne", Compare::NOT_EQUAL },
+ { "<", Compare::LESS_THAN },
+ { "lt", Compare::LESS_THAN },
+ { "<", Compare::LESS_THAN },
+ { "<=", Compare::LESS_THAN_EQUAL },
+ { "lte", Compare::LESS_THAN_EQUAL },
+ { "<=", Compare::LESS_THAN_EQUAL },
+ { ">", Compare::GREATER_THAN },
+ { "gt", Compare::GREATER_THAN },
+ { ">", Compare::GREATER_THAN },
+ { ">=", Compare::GREATER_THAN_EQUAL },
+ { "gte", Compare::GREATER_THAN_EQUAL },
+ { ">=", Compare::GREATER_THAN_EQUAL },
+ { "&", Compare::CONTAINS },
+ { "~", Compare::NOT_CONTAINS }
+};
+
+static const unordered_map<Compare::Type, TypeAlias, CompareTypeHash> typeAliases =
+{
+ { Compare::EQUAL, TypeAlias{ "Compare::EQUAL", "==", "eq" } },
+ { Compare::NOT_EQUAL, TypeAlias{ "Compare::NOT_EQUAL", "!=", "ne" } },
+ { Compare::LESS_THAN, TypeAlias{ "Compare::LESS_THAN", "<", "lt" } },
+ { Compare::LESS_THAN_EQUAL, TypeAlias{ "Compare::LESS_THAN_EQUAL", "<=", "lte" } },
+ { Compare::GREATER_THAN, TypeAlias{ "Compare::GREATER_THAN", ">", "gt" } },
+ { Compare::GREATER_THAN_EQUAL, TypeAlias{ "Compare::GREATER_THAN_EQUAL", ">=", "gte" } },
+ { Compare::CONTAINS, TypeAlias{ "Compare::CONTAINS", "&", "and" } },
+ { Compare::NOT_CONTAINS, TypeAlias{ "Compare::NOT_CONTAINS", "~", "not" } }
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+Compare::Type Compare::TypeFromOperator(const string& opString)
+{
+ try {
+ return internal::opToTypeMap.at(opString);
+ } catch (std::exception&) {
+ throw std::runtime_error(opString + " is not a valid comparison operator." );
+ }
+}
+
+string Compare::TypeToName(const Compare::Type& type)
+{
+ try {
+ return internal::typeAliases.at(type).name_;
+ } catch (std::exception&) {
+ throw std::runtime_error("invalid comparison type encountered" );
+ }
+}
+
+string Compare::TypeToOperator(const Compare::Type& type, bool asAlpha)
+{
+ try {
+ return asAlpha ? internal::typeAliases.at(type).opAlpha_
+ : internal::typeAliases.at(type).op_;
+ } catch (std::exception&) {
+ throw std::runtime_error("invalid comparison type encountered" );
+ }
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Config.cpp
+/// \brief Initializes global variable defaults.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Config.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+namespace PacBio {
+namespace BAM {
+
+// Initialized to -1 to indicate default. Client code may set this or not.
+//
+// To respect client code or else fallback to default[OFF], this value should be used like this:
+//
+// hts_verbose = ( PacBio::BAM::HtslibVerbosity == -1 ? 0 : PacBio::BAM::HtslibVerbosity);
+//
+//
+//
+int HtslibVerbosity = -1;
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file DataSet.cpp
+/// \brief Implements the DataSet class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/DataSet.h"
+#include "pbbam/DataSetTypes.h"
+#include "pbbam/internal/DataSetBaseTypes.h"
+#include "DataSetIO.h"
+#include "FileUtils.h"
+#include "TimeUtils.h"
+#include <boost/algorithm/string.hpp>
+#include <unordered_map>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static const string defaultVersion{ "4.0.0" };
+
+static inline void InitDefaults(DataSet& ds)
+{
+ // provide default 'CreatedAt' & 'Version' attributes if not already present in XML
+
+ if (ds.CreatedAt().empty())
+ ds.CreatedAt(internal::ToIso8601(CurrentTime()));
+
+ if (ds.Version().empty())
+ ds.Version(internal::defaultVersion);
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+DataSet::DataSet(void)
+ : DataSet(DataSet::GENERIC)
+{
+ InitDefaults(*this);
+}
+
+DataSet::DataSet(const DataSet::TypeEnum type)
+ : d_(nullptr)
+ , path_(FileUtils::CurrentWorkingDirectory())
+{
+ switch(type) {
+ case DataSet::GENERIC : d_.reset(new DataSetBase); break;
+ case DataSet::ALIGNMENT : d_.reset(new AlignmentSet); break;
+ case DataSet::BARCODE : d_.reset(new BarcodeSet); break;
+ case DataSet::CONSENSUS_ALIGNMENT : d_.reset(new ConsensusAlignmentSet); break;
+ case DataSet::CONSENSUS_READ : d_.reset(new ConsensusReadSet); break;
+ case DataSet::CONTIG : d_.reset(new ContigSet); break;
+ case DataSet::HDF_SUBREAD : d_.reset(new HdfSubreadSet); break;
+ case DataSet::REFERENCE : d_.reset(new ReferenceSet); break;
+ case DataSet::SUBREAD : d_.reset(new SubreadSet); break;
+ default:
+ throw std::runtime_error("unsupported dataset type"); // unknown type
+ }
+
+ InitDefaults(*this);
+}
+
+DataSet::DataSet(const BamFile& bamFile)
+ : d_(DataSetIO::FromUri(bamFile.Filename()))
+ , path_(FileUtils::CurrentWorkingDirectory())
+{
+ InitDefaults(*this);
+}
+
+DataSet::DataSet(const string& filename)
+ : d_(DataSetIO::FromUri(filename))
+ , path_(FileUtils::DirectoryName(filename))
+{
+ // for FOFN contents and raw BAM filenames, we can just use the current
+ // directory as the starting path.
+ //
+ // (any relative paths in the FOFN have already been resolved)
+ //
+ if (boost::algorithm::iends_with(filename, ".fofn") ||
+ boost::algorithm::iends_with(filename, ".bam"))
+ {
+ path_ = FileUtils::CurrentWorkingDirectory();
+ }
+ InitDefaults(*this);
+}
+
+DataSet::DataSet(const vector<string>& filenames)
+ : d_(DataSetIO::FromUris(filenames))
+ , path_(FileUtils::CurrentWorkingDirectory())
+{
+ InitDefaults(*this);
+}
+
+DataSet::DataSet(const DataSet& other)
+ : path_(other.path_)
+{
+ DataSetBase* otherDataset = other.d_.get();
+ DataSetElement* copyDataset = new DataSetElement(*otherDataset);
+ d_.reset(static_cast<DataSetBase*>(copyDataset));
+}
+
+DataSet::DataSet(DataSet&& other)
+ : d_(std::move(other.d_))
+ , path_(std::move(other.path_))
+{
+ assert(other.d_.get() == nullptr);
+}
+
+DataSet& DataSet::operator=(const DataSet& other)
+{
+ DataSetBase* otherDataset = other.d_.get();
+ DataSetElement* copyDataset = new DataSetElement(*otherDataset);
+ d_.reset(static_cast<DataSetBase*>(copyDataset));
+ path_ = other.path_;
+ return *this;
+}
+
+DataSet& DataSet::operator=(DataSet&& other)
+{
+ d_ = std::move(other.d_);
+ path_ = std::move(other.path_);
+ return *this;
+}
+
+DataSet::~DataSet(void) { }
+
+DataSet& DataSet::operator+=(const DataSet& other)
+{
+ *d_.get() += *other.d_.get();
+ return *this;
+}
+
+vector<BamFile> DataSet::BamFiles(void) const
+{
+ const PacBio::BAM::ExternalResources& resources = ExternalResources();
+
+ vector<BamFile> result;
+ result.reserve(resources.Size());
+ for(const ExternalResource& ext : resources) {
+
+ // only bother resolving file path if this is a BAM file
+ boost::iterator_range<string::const_iterator> bamFound = boost::algorithm::ifind_first(ext.MetaType(), "bam");
+ if (!bamFound.empty()) {
+ const string fn = ResolvePath(ext.ResourceId());
+ result.push_back(BamFile(fn));
+ }
+ }
+ return result;
+}
+
+DataSet DataSet::FromXml(const string& xml)
+{
+ DataSet result;
+ result.d_ = internal::DataSetIO::FromXmlString(xml);
+ InitDefaults(result);
+ return result;
+}
+
+const NamespaceRegistry& DataSet::Namespaces(void) const
+{ return d_->Namespaces(); }
+
+NamespaceRegistry& DataSet::Namespaces(void)
+{ return d_->Namespaces(); }
+
+DataSet::TypeEnum DataSet::NameToType(const string& typeName)
+{
+ static std::unordered_map<std::string, DataSet::TypeEnum> lookup;
+ if (lookup.empty()) {
+ lookup["DataSet"] = DataSet::GENERIC;
+ lookup["AlignmentSet"] = DataSet::ALIGNMENT;
+ lookup["BarcodeSet"] = DataSet::BARCODE;
+ lookup["ConsensusAlignmentSet"] = DataSet::CONSENSUS_ALIGNMENT;
+ lookup["ConsensusReadSet"] = DataSet::CONSENSUS_READ;
+ lookup["ContigSet"] = DataSet::CONTIG;
+ lookup["HdfSubreadSet"] = DataSet::HDF_SUBREAD;
+ lookup["ReferenceSet"] = DataSet::REFERENCE;
+ lookup["SubreadSet"] = DataSet::SUBREAD;
+ }
+ return lookup.at(typeName); // throws if unknown typename
+}
+
+vector<string> DataSet::ResolvedResourceIds(void) const
+{
+ const PacBio::BAM::ExternalResources& resources = ExternalResources();
+
+ vector<string> result;
+ result.reserve(resources.Size());
+ for(const ExternalResource& ext : resources) {
+// const string fn = ;
+// const string fn = internal::FileUtils::ResolvedFilePath(ext.ResourceId(), path_);
+ result.push_back(ResolvePath(ext.ResourceId()));
+ }
+ return result;
+}
+
+string DataSet::ResolvePath(const string& originalPath) const
+{ return internal::FileUtils::ResolvedFilePath(originalPath, path_); }
+
+void DataSet::Save(const std::string& outputFilename)
+{ DataSetIO::ToFile(d_, outputFilename); }
+
+void DataSet::SaveToStream(ostream& out)
+{ DataSetIO::ToStream(d_, out); }
+
+set<string> DataSet::SequencingChemistries(void) const
+{
+ const vector<BamFile> bamFiles{ BamFiles() };
+
+ set<string> result;
+ for(const BamFile& bf : bamFiles) {
+ if (!bf.IsPacBioBAM())
+ throw std::runtime_error{ "only PacBio BAMs are supported" };
+ const vector<ReadGroupInfo> readGroups{ bf.Header().ReadGroups() };
+ for (const ReadGroupInfo& rg : readGroups)
+ result.insert(rg.SequencingChemistry());
+ }
+ return result;
+}
+
+string DataSet::TypeToName(const DataSet::TypeEnum& type)
+{
+ switch(type) {
+ case DataSet::GENERIC : return "DataSet";
+ case DataSet::ALIGNMENT : return "AlignmentSet";
+ case DataSet::BARCODE : return "BarcodeSet";
+ case DataSet::CONSENSUS_ALIGNMENT : return "ConsensusAlignmentSet";
+ case DataSet::CONSENSUS_READ : return "ConsensusReadSet";
+ case DataSet::CONTIG : return "ContigSet";
+ case DataSet::HDF_SUBREAD : return "HdfSubreadSet";
+ case DataSet::REFERENCE : return "ReferenceSet";
+ case DataSet::SUBREAD : return "SubreadSet";
+ default:
+ throw std::runtime_error("unsupported dataset type"); // unknown type
+ }
+}
+
+// Exposed timestamp utils
+
+namespace PacBio {
+namespace BAM {
+
+string CurrentTimestamp(void)
+{ return internal::ToDataSetFormat(internal::CurrentTime()); }
+
+string ToDataSetFormat(const chrono::system_clock::time_point &tp)
+{ return internal::ToDataSetFormat(tp); }
+
+string ToDataSetFormat(const time_t &t)
+{ return ToDataSetFormat(chrono::system_clock::from_time_t(t)); }
+
+string ToIso8601(const chrono::system_clock::time_point &tp)
+{ return internal::ToIso8601(tp); }
+
+string ToIso8601(const time_t &t)
+{ return ToIso8601(chrono::system_clock::from_time_t(t)); }
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "pbbam/DataSetTypes.h"
+#include "pbbam/internal/DataSetBaseTypes.h"
+#include "DataSetUtils.h"
+#include "TimeUtils.h"
+#include <boost/algorithm/string.hpp>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+// ----------------
+// BaseEntityType
+// ----------------
+
+BaseEntityType::BaseEntityType(const std::string& label, const XsdType& xsd)
+ : DataSetElement(label, xsd)
+{
+ if (Version().empty())
+ Version(internal::XML_VERSION);
+}
+
+DEFINE_ACCESSORS(BaseEntityType, Extensions, Extensions)
+
+BaseEntityType& BaseEntityType::Extensions(const PacBio::BAM::Extensions& extensions)
+{ Extensions() = extensions; return *this; }
+
+// ----------------
+// DataEntityType
+// ----------------
+
+DataEntityType::DataEntityType(const std::string& label, const XsdType& xsd)
+ : BaseEntityType(label, xsd)
+{ }
+
+// -----------------
+// IndexedDataType
+// -----------------
+
+IndexedDataType::IndexedDataType(const string& metatype,
+ const string& filename,
+ const string& label,
+ const XsdType &xsd)
+ : InputOutputDataType(metatype, filename, label, xsd)
+{ }
+
+DEFINE_ACCESSORS(IndexedDataType, FileIndices, FileIndices)
+
+IndexedDataType& IndexedDataType::FileIndices(const PacBio::BAM::FileIndices& indices)
+{ FileIndices() = indices; return *this; }
+
+// ---------------------
+// InputOutputDataType
+// ---------------------
+
+InputOutputDataType::InputOutputDataType(const string& metatype,
+ const string& filename,
+ const string& label,
+ const XsdType &xsd)
+ : StrictEntityType(metatype, label, xsd)
+{
+ ResourceId(filename);
+}
+
+// ----------------
+// StrictEntityType
+// ----------------
+
+StrictEntityType::StrictEntityType(const string& metatype,
+ const string& label,
+ const XsdType& xsd)
+ : BaseEntityType(label, xsd)
+{
+ // MetaType
+ MetaType(metatype);
+
+ // TimeStampedName
+ const size_t numChars = metatype.size();
+ string transformedMetatype;
+ transformedMetatype.resize(numChars);
+ for (size_t i = 0; i < numChars; ++i) {
+ const char c = metatype.at(i);
+ transformedMetatype[i] = ((c == '.') ? '_' : tolower(c));
+ }
+ const string& tsn = transformedMetatype + "-" + internal::ToDataSetFormat(internal::CurrentTime());
+ TimeStampedName(tsn);
+
+ // UniqueId
+ UniqueId(internal::GenerateUuid());
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "pbbam/internal/DataSetElement.h"
+#include "DataSetUtils.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+
+const std::string& DataSetElement::SharedNullString(void)
+{
+ return internal::NullObject<std::string>();
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "DataSetIO.h"
+#include "FileUtils.h"
+#include "FofnReader.h"
+#include "StringUtils.h"
+#include "XmlReader.h"
+#include "XmlWriter.h"
+#include <boost/algorithm/string.hpp>
+#include <exception>
+#include <fstream>
+#include <iostream>
+#include <cassert>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+typedef std::shared_ptr<DataSetBase> DataSetPtr;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static
+unique_ptr<DataSetBase> FromXml(const string& xmlFn)
+{
+ ifstream in(xmlFn);
+ if (!in)
+ throw std::runtime_error("could not open XML file for reading");
+ return XmlReader::FromStream(in);
+}
+
+static
+unique_ptr<DataSetBase> FromBam(const string& bamFn)
+{
+ unique_ptr<DataSetBase> dataset(new SubreadSet);
+ ExternalResources& resources = dataset->ExternalResources();
+ resources.Add(ExternalResource(BamFile(bamFn)));
+ return dataset;
+}
+
+static
+unique_ptr<DataSetBase> FromFofn(const string& fofn)
+{
+ const string fofnDir = internal::FileUtils::DirectoryName(fofn);
+ ifstream in(fofn);
+ if (!in)
+ throw std::runtime_error("could not open FOFN for reading");
+
+ vector<string> filenames = FofnReader::Files(in);
+ for (size_t i = 0; i < filenames.size(); ++i)
+ filenames[i] = internal::FileUtils::ResolvedFilePath(filenames[i], fofnDir);
+ return DataSetIO::FromUris(filenames);
+}
+
+static
+unique_ptr<DataSetBase> FromUri(const string& uri)
+{
+ // NOTE: this says URI, but we're not quite handling filenames as true URIs
+ // basically just treating as a regular filename for now
+
+ // handle on extension
+ if (boost::algorithm::iends_with(uri, ".xml"))
+ return FromXml(uri);
+ else if (boost::algorithm::iends_with(uri, ".bam"))
+ return FromBam(uri);
+ else if (boost::algorithm::iends_with(uri, ".fofn"))
+ return FromFofn(uri);
+
+ // unknown filename extension
+ throw std::runtime_error("unsupported input file extension");
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+std::unique_ptr<DataSetBase> DataSetIO::FromUri(const std::string& uri)
+{
+ return FromUris(vector<string>(1, uri));
+}
+
+std::unique_ptr<DataSetBase> DataSetIO::FromUris(const std::vector<std::string>& uris)
+{
+ if (uris.empty())
+ throw std::runtime_error("empty input URI list"); // or just return empty, generic DataSet?
+
+ // create dataset(s) from URI(s)
+ vector< unique_ptr<DataSetBase> > datasets;
+ datasets.reserve(uris.size());
+ for ( const auto& uri : uris )
+ datasets.push_back(internal::FromUri(uri));
+ assert(!datasets.empty());
+
+ // if only 1, just return
+ if (datasets.size() == 1)
+ return std::unique_ptr<DataSetBase>(datasets.front().release());
+
+ // else merge
+ else {
+ unique_ptr<DataSetBase>& result = datasets.front();
+ for (size_t i = 1; i < datasets.size(); ++i)
+ *result += *datasets.at(i);
+ return unique_ptr<DataSetBase>(result.release());
+ }
+}
+
+std::unique_ptr<DataSetBase> DataSetIO::FromXmlString(const string& xml)
+{
+ if (xml.empty())
+ throw std::runtime_error("empty XML string");
+ stringstream s(xml);
+ return XmlReader::FromStream(s);
+}
+
+void DataSetIO::ToFile(const std::unique_ptr<DataSetBase>& dataset,
+ const string& fn)
+{
+ ofstream out(fn);
+ if (!out)
+ throw std::runtime_error("could not open XML for writing");
+ XmlWriter::ToStream(dataset, out);
+}
+
+void DataSetIO::ToStream(const std::unique_ptr<DataSetBase>& dataset, ostream &out)
+{ XmlWriter::ToStream(dataset, out); }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef DATASETIO_H
+#define DATASETIO_H
+
+#include "pbbam/DataSet.h"
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class DataSetIO
+{
+public:
+
+ // input
+ static std::unique_ptr<DataSetBase> FromUri(const std::string& uri);
+ static std::unique_ptr<DataSetBase> FromUris(const std::vector<std::string>& uris);
+
+ static std::unique_ptr<DataSetBase> FromXmlString(const std::string& xml);
+
+// static DataSetBase FromUri(const std::string& uri);
+// static DataSetBase FromUris(const std::vector<std::string>& uris);
+
+// // output
+ static void ToFile(const std::unique_ptr<DataSetBase>& dataset, const std::string& fn);
+ static void ToStream(const std::unique_ptr<DataSetBase>& dataset, std::ostream& out);
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // DATASETIO_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file DataSetTypes.cpp
+/// \brief Implementations for the public DataSet component classes.
+//
+// Author: Derek Barnett
+
+#include "pbbam/DataSetTypes.h"
+#include "pbbam/internal/DataSetBaseTypes.h"
+#include "DataSetUtils.h"
+#include "FileUtils.h"
+#include "TimeUtils.h"
+#include <set>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+// -------------------
+// AlignmentSet
+// -------------------
+
+AlignmentSet::AlignmentSet(void)
+ : DataSetBase("PacBio.DataSet.AlignmentSet",
+ "AlignmentSet",
+ XsdType::DATASETS)
+{ }
+
+// -------------------
+// BarcodeSet
+// -------------------
+
+BarcodeSet::BarcodeSet(void)
+ : DataSetBase("PacBio.DataSet.BarcodeSet",
+ "BarcodeSet",
+ XsdType::DATASETS)
+{ }
+
+// -----------------------
+// ConsensusAlignmentSet
+// -----------------------
+
+ConsensusAlignmentSet::ConsensusAlignmentSet(void)
+ : DataSetBase("PacBio.DataSet.ConsensusAlignmentSet",
+ "ConsensusAlignmentSet",
+ XsdType::DATASETS)
+{ }
+
+// -------------------
+// ConsensusReadSet
+// -------------------
+
+ConsensusReadSet::ConsensusReadSet(void)
+ : DataSetBase("PacBio.DataSet.ConsensusReadSet",
+ "ConsensusReadSet",
+ XsdType::DATASETS)
+{ }
+
+// -------------------
+// ContigSet
+// -------------------
+
+ContigSet::ContigSet(void)
+ : DataSetBase("PacBio.DataSet.ContigSet",
+ "ContigSet",
+ XsdType::DATASETS)
+{ }
+
+// -------------------
+// DataSetBase
+// -------------------
+
+DataSetBase::DataSetBase(void)
+ : StrictEntityType("PacBio.DataSet.DataSet",
+ "DataSet",
+ XsdType::DATASETS)
+{ }
+
+DataSetBase::DataSetBase(const string& metatype,
+ const string& label,
+ const XsdType& xsd)
+ : StrictEntityType(metatype, label, xsd)
+{ }
+
+DEFINE_ACCESSORS(DataSetBase, ExternalResources, ExternalResources)
+
+DataSetBase& DataSetBase::ExternalResources(const PacBio::BAM::ExternalResources& resources)
+{ ExternalResources() = resources; return *this; }
+
+DEFINE_ACCESSORS(DataSetBase, Filters, Filters)
+
+DataSetBase& DataSetBase::Filters(const PacBio::BAM::Filters& filters)
+{ Filters() = filters; return *this; }
+
+DEFINE_ACCESSORS(DataSetBase, DataSetMetadata, Metadata)
+
+DataSetBase& DataSetBase::Metadata(const PacBio::BAM::DataSetMetadata& metadata)
+{ Metadata() = metadata; return *this; }
+
+const PacBio::BAM::SubDataSets& DataSetBase::SubDataSets(void) const
+{
+ try {
+ return Child<PacBio::BAM::SubDataSets>("DataSets");
+ } catch (std::exception&) {
+ return internal::NullObject<PacBio::BAM::SubDataSets>();
+ }
+}
+
+PacBio::BAM::SubDataSets& DataSetBase::SubDataSets(void)
+{
+ if (!HasChild("DataSets"))
+ AddChild(internal::NullObject<PacBio::BAM::SubDataSets>());
+ return Child<PacBio::BAM::SubDataSets>("DataSets");
+}
+
+DataSetBase& DataSetBase::SubDataSets(const PacBio::BAM::SubDataSets &subdatasets)
+{ SubDataSets() = subdatasets; return *this; }
+
+DataSetBase* DataSetBase::DeepCopy(void) const
+{
+ DataSetElement* copyDataset = new DataSetElement(*this);
+ DataSetBase* result = static_cast<DataSetBase*>(copyDataset);
+ result->registry_ = registry_;
+ return result;
+}
+
+DataSetBase& DataSetBase::operator+=(const DataSetBase& other)
+{
+ // must be same dataset types (or 'other' must be generic)
+ if (other.LocalNameLabel() != LocalNameLabel() && other.LocalNameLabel() != "DataSet")
+ throw std::runtime_error("cannot merge incompatible dataset types");
+
+ // check filter match
+ // check object metadata
+ Metadata() += other.Metadata();
+ ExternalResources() += other.ExternalResources();
+ Filters() += other.Filters();
+ SubDataSets() += other;
+
+ return *this;
+}
+
+std::shared_ptr<DataSetBase> DataSetBase::Create(const string& typeName)
+{
+ if (typeName == string("DataSet")) return make_shared<DataSetBase>();
+ if (typeName == string("SubreadSet")) return make_shared<SubreadSet>();
+ if (typeName == string("AlignmentSet")) return make_shared<AlignmentSet>();
+ if (typeName == string("BarcodeSet")) return make_shared<BarcodeSet>();
+ if (typeName == string("ConsensusAlignmentSet")) return make_shared<ConsensusAlignmentSet>();
+ if (typeName == string("ConsensusReadSet")) return make_shared<ConsensusReadSet>();
+ if (typeName == string("ContigSet")) return make_shared<ContigSet>();
+ if (typeName == string("HdfSubreadSet")) return make_shared<HdfSubreadSet>();
+ if (typeName == string("ReferenceSet")) return make_shared<ReferenceSet>();
+
+ // unknown typename
+ throw std::runtime_error("unsupported dataset type");
+}
+
+// -------------------
+// DataSetMetadata
+// -------------------
+
+DataSetMetadata::DataSetMetadata(const std::string& numRecords,
+ const std::string& totalLength)
+ : DataSetElement("DataSetMetadata", XsdType::DATASETS)
+{
+ NumRecords(numRecords);
+ TotalLength(totalLength);
+}
+
+DEFINE_ACCESSORS(DataSetMetadata, Provenance, Provenance)
+
+DataSetMetadata& DataSetMetadata::Provenance(const PacBio::BAM::Provenance& provenance)
+{ Provenance() = provenance; return *this; }
+
+DataSetMetadata& DataSetMetadata::operator+=(const DataSetMetadata& other)
+{
+ NumRecords() = NumRecords() + other.NumRecords();
+ TotalLength() = TotalLength() + other.TotalLength();
+ // merge add'l
+ return *this;
+}
+
+// -------------------
+// ExtensionElement
+// -------------------
+
+ExtensionElement::ExtensionElement(void)
+ : DataSetElement("ExtensionElement", XsdType::BASE_DATA_MODEL)
+{ }
+
+// -------------------
+// Extensions
+// -------------------
+
+Extensions::Extensions(void)
+ : DataSetListElement<ExtensionElement>("Extensions", XsdType::BASE_DATA_MODEL)
+{ }
+
+// -------------------
+// ExternalResource
+// -------------------
+
+ExternalResource::ExternalResource(const BamFile& bamFile)
+ : IndexedDataType("PacBio.SubreadFile.SubreadBamFile",
+ bamFile.Filename(),
+ "ExternalResource",
+ XsdType::BASE_DATA_MODEL)
+{ }
+
+ExternalResource::ExternalResource(const string& metatype,
+ const string& filename)
+ : IndexedDataType(metatype,
+ filename,
+ "ExternalResource",
+ XsdType::BASE_DATA_MODEL)
+{ }
+
+DEFINE_ACCESSORS(ExternalResource, ExternalResources, ExternalResources)
+
+ExternalResource& ExternalResource::ExternalResources(const PacBio::BAM::ExternalResources& resources)
+{ ExternalResources() = resources; return *this; }
+
+BamFile ExternalResource::ToBamFile(void) const
+{ return BamFile(ResourceId()); }
+
+// -------------------
+// ExternalResources
+// -------------------
+
+ExternalResources::ExternalResources(void)
+ : DataSetListElement<ExternalResource>("ExternalResources",
+ XsdType::BASE_DATA_MODEL)
+{ }
+
+ExternalResources& ExternalResources::operator+=(const ExternalResources& other)
+{
+ // only keep unique resource ids
+
+ set<std::string> myResourceIds;
+ for (size_t i = 0; i < Size(); ++i) {
+ const ExternalResource& resource = this->operator[](i);
+ myResourceIds.insert(resource.ResourceId());
+ }
+
+ vector<size_t> newResourceIndices;
+ const size_t numOtherResourceIds = other.Size();
+ for (size_t i = 0; i < numOtherResourceIds; ++i) {
+ const string& resourceId = other[i].ResourceId();
+ auto found = myResourceIds.find(resourceId);
+ if (found == myResourceIds.cend())
+ newResourceIndices.push_back(i);
+ }
+
+ for (size_t index : newResourceIndices)
+ Add(other[index]);
+
+ return *this;
+}
+
+void ExternalResources::Add(const ExternalResource& ext)
+{
+ // disallow external resources w/ duplicate ResourceIds
+ set<std::string> myResourceIds;
+ for (size_t i = 0; i < Size(); ++i) {
+ const ExternalResource& resource = this->operator[](i);
+ myResourceIds.insert(resource.ResourceId());
+ }
+ if (myResourceIds.find(ext.ResourceId()) == myResourceIds.cend())
+ AddChild(ext);
+}
+
+vector<BamFile> ExternalResources::BamFiles(void) const
+{
+ vector<BamFile> result;
+ const int numResources = Size();
+ result.reserve(numResources);
+ for( const ExternalResource& ext : *this )
+ result.push_back(ext.ToBamFile());
+ return result;
+}
+
+void ExternalResources::Remove(const ExternalResource& ext)
+{ RemoveChild(ext); }
+
+// -------------------
+// FileIndex
+// -------------------
+
+FileIndex::FileIndex(const string& metatype, const string& filename)
+ : InputOutputDataType(metatype,
+ filename,
+ "FileIndex",
+ XsdType::BASE_DATA_MODEL)
+{ }
+
+// -------------------
+// FileIndices
+// -------------------
+
+FileIndices::FileIndices(void)
+ : DataSetListElement<FileIndex>("FileIndices", XsdType::BASE_DATA_MODEL)
+{ }
+
+void FileIndices::Add(const FileIndex& index)
+{ AddChild(index); }
+
+void FileIndices::Remove(const FileIndex& index)
+{ RemoveChild(index); }
+
+// -------------------
+// Filter
+// -------------------
+
+Filter::Filter(void)
+ : DataSetElement("Filter", XsdType::DATASETS)
+{ }
+
+DEFINE_ACCESSORS(Filter, Properties, Properties)
+
+Filter& Filter::Properties(const PacBio::BAM::Properties& properties)
+{ Properties() = properties; return *this; }
+
+// -------------------
+// Filters
+// -------------------
+
+Filters::Filters(void)
+ : DataSetListElement<Filter>("Filters", XsdType::DATASETS)
+{ }
+
+Filters& Filters::operator+=(const Filters& other)
+{
+ for (auto& newFilter : other)
+ AddChild(newFilter);
+ return *this;
+}
+
+void Filters::Add(const Filter& filter)
+{ AddChild(filter); }
+
+void Filters::Remove(const Filter& filter)
+{ RemoveChild(filter); }
+
+// -------------------
+// HdfSubreadSet
+// -------------------
+
+HdfSubreadSet::HdfSubreadSet(void)
+ : DataSetBase("PacBio.DataSet.HdfSubreadSet",
+ "HdfSubreadSet",
+ XsdType::DATASETS)
+{ }
+
+// -------------------
+// ParentTool
+// -------------------
+
+ParentTool::ParentTool(void)
+ : BaseEntityType("ParentTool", XsdType::DATASETS)
+{ }
+
+// -------------------
+// Properties
+// -------------------
+
+Properties::Properties(void)
+ : DataSetListElement<Property>("Properties", XsdType::BASE_DATA_MODEL)
+{ }
+
+void Properties::Add(const Property &property)
+{ AddChild(property); }
+
+void Properties::Remove(const Property& property)
+{ RemoveChild(property); }
+
+// -------------------
+// Property
+// -------------------
+
+Property::Property(const std::string& name,
+ const std::string& value,
+ const std::string& op)
+ : DataSetElement("Property", XsdType::BASE_DATA_MODEL)
+{
+ Name(name);
+ Value(value);
+ Operator(op);
+}
+
+// -------------------
+// Provenance
+// -------------------
+
+Provenance::Provenance(void)
+ : DataSetElement("Provenance", XsdType::DATASETS)
+{ }
+
+DEFINE_ACCESSORS(Provenance, ParentTool, ParentTool)
+
+// -------------------
+// ReferenceSet
+// -------------------
+
+ReferenceSet::ReferenceSet(void)
+ : DataSetBase("PacBio.DataSet.ReferenceSet",
+ "ReferenceSet",
+ XsdType::DATASETS)
+{ }
+
+// -------------------
+// SubDataSets
+// -------------------
+
+SubDataSets::SubDataSets(void)
+ : internal::DataSetListElement<DataSetBase>("DataSets", XsdType::DATASETS)
+{ }
+
+SubDataSets& SubDataSets::operator+=(const DataSetBase& other)
+{
+ AddChild(other);
+ return *this;
+}
+
+SubDataSets& SubDataSets::operator+=(const SubDataSets& other)
+{
+ for (auto& newSubDataset : other)
+ AddChild(newSubDataset);
+ return *this;
+}
+
+void SubDataSets::Add(const DataSetBase& subdataset)
+{ AddChild(subdataset); }
+
+void SubDataSets::Remove(const DataSetBase& subdataset)
+{ RemoveChild(subdataset); }
+
+// -------------------
+// SubreadSet
+// -------------------
+
+SubreadSet::SubreadSet(void)
+ : DataSetBase("PacBio.DataSet.SubreadSet",
+ "SubreadSet",
+ XsdType::DATASETS)
+{ }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef DATASETUTILS_H
+#define DATASETUTILS_H
+
+#include "pbbam/DataSetTypes.h"
+#include <boost/uuid/random_generator.hpp>
+#include <boost/uuid/uuid_io.hpp>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static const std::string XML_VERSION = std::string { "3.0.1" };
+
+template<typename T>
+inline const T& NullObject(void)
+{
+ static const T empty;
+ return empty;
+}
+
+template<>
+inline const PacBio::BAM::DataSetMetadata& NullObject(void)
+{
+ static const PacBio::BAM::DataSetMetadata empty("", "");
+ return empty;
+}
+
+inline
+std::string GenerateUuid(void)
+{
+ static boost::uuids::random_generator gen;
+ const boost::uuids::uuid uuid = gen();
+ return boost::uuids::to_string(uuid);
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#ifndef FETCH_CHILD_CONST_REF
+#define FETCH_CHILD_CONST_REF(Class, Type, Method) \
+ \
+ const PacBio::BAM::Type& Class::Method(void) const \
+ { \
+ try { \
+ return Child<PacBio::BAM::Type>(#Type); \
+ } catch (std::exception&) { \
+ return internal::NullObject<PacBio::BAM::Type>(); \
+ } \
+ }
+#endif
+
+#ifndef FETCH_CHILD_REF
+#define FETCH_CHILD_REF(Class, Type, Method) \
+ \
+ PacBio::BAM::Type& Class::Method(void) \
+ { \
+ if (!HasChild(#Type)) \
+ AddChild(internal::NullObject<PacBio::BAM::Type>()); \
+ return Child<PacBio::BAM::Type>(#Type); \
+ }
+#endif
+
+#ifndef DEFINE_ACCESSORS
+#define DEFINE_ACCESSORS(Class, Type, Method) \
+ FETCH_CHILD_CONST_REF(Class, Type, Method) \
+ FETCH_CHILD_REF(Class, Type, Method)
+#endif
+
+#endif // DATASETUTILS_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file DataSetXsd.cpp
+/// \brief Implements the XSD- and namespace-related classes for DataSetXML.
+//
+// Author: Derek Barnett
+
+#include "pbbam/DataSetXsd.h"
+#include <unordered_map>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static map<XsdType, NamespaceInfo> DefaultRegistry(void)
+{
+ const auto result = map<XsdType, NamespaceInfo>
+ {
+ { XsdType::NONE, NamespaceInfo{ "", "" } },
+ { XsdType::AUTOMATION_CONSTRAINTS, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioAutomationConstraints.xsd" } },
+ { XsdType::BASE_DATA_MODEL, NamespaceInfo{ "pbbase", "http://pacificbiosciences.com/PacBioBaseDataModel.xsd" } },
+ { XsdType::COLLECTION_METADATA, NamespaceInfo{ "pbmeta", "http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" } },
+ { XsdType::COMMON_MESSAGES, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioCommonMessages.xsd" } },
+ { XsdType::DATA_MODEL, NamespaceInfo{ "pbdm", "http://pacificbiosciences.com/PacBioDataModel.xsd" } },
+ { XsdType::DATA_STORE, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioDataStore.xsd" } },
+ { XsdType::DATASETS, NamespaceInfo{ "pbds", "http://pacificbiosciences.com/PacBioDatasets.xsd" } },
+ { XsdType::DECL_DATA, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioDeclData.xsd" } },
+ { XsdType::PART_NUMBERS, NamespaceInfo{ "pbpn", "http://pacificbiosciences.com/PacBioPartNumbers.xsd" } },
+ { XsdType::PRIMARY_METRICS, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioPrimaryMetrics.xsd" } },
+ { XsdType::REAGENT_KIT, NamespaceInfo{ "pbrk", "http://pacificbiosciences.com/PacBioReagentKit.xsd" } },
+ { XsdType::RIGHTS_AND_ROLES, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioRightsAndRoles.xsd" } },
+ { XsdType::SAMPLE_INFO, NamespaceInfo{ "pbsample", "http://pacificbiosciences.com/PacBioSampleInfo.xsd" } },
+ { XsdType::SEEDING_DATA, NamespaceInfo{ "", "http://pacificbiosciences.com/PacBioSeedingData.xsd" } }
+ };
+ return result;
+}
+
+static const auto elementRegistry = unordered_map<string, XsdType>
+{
+ // 'pbbase' elements
+ //
+ { "AutomationParameter" , XsdType::BASE_DATA_MODEL },
+ { "AutomationParameters" , XsdType::BASE_DATA_MODEL },
+ { "BinCount" , XsdType::BASE_DATA_MODEL },
+ { "BinCounts" , XsdType::BASE_DATA_MODEL },
+ { "BinLabel" , XsdType::BASE_DATA_MODEL },
+ { "BinLabels" , XsdType::BASE_DATA_MODEL },
+ { "BinWidth" , XsdType::BASE_DATA_MODEL },
+ { "ExternalResource" , XsdType::BASE_DATA_MODEL },
+ { "ExternalResources" , XsdType::BASE_DATA_MODEL },
+ { "FileIndex" , XsdType::BASE_DATA_MODEL },
+ { "FileIndices" , XsdType::BASE_DATA_MODEL },
+ { "MaxBinValue" , XsdType::BASE_DATA_MODEL },
+ { "MaxOutlierValue" , XsdType::BASE_DATA_MODEL },
+ { "MetricDescription" , XsdType::BASE_DATA_MODEL },
+ { "NumBins" , XsdType::BASE_DATA_MODEL },
+ { "Properties" , XsdType::BASE_DATA_MODEL },
+ { "Property" , XsdType::BASE_DATA_MODEL },
+ { "Sample95thPct" , XsdType::BASE_DATA_MODEL },
+ { "SampleMean" , XsdType::BASE_DATA_MODEL },
+ { "SampleMed" , XsdType::BASE_DATA_MODEL },
+ { "SampleSize" , XsdType::BASE_DATA_MODEL },
+ { "SampleStd" , XsdType::BASE_DATA_MODEL },
+
+ // 'pbds' elements
+ //
+ { "AdapterDimerFraction", XsdType::DATASETS },
+ { "AlignmentSet", XsdType::DATASETS },
+ { "BarcodeConstruction", XsdType::DATASETS },
+ { "BarcodeSet", XsdType::DATASETS },
+ { "ConsensusAlignmentSet", XsdType::DATASETS },
+ { "ConsensusReadSet", XsdType::DATASETS },
+ { "Contig", XsdType::DATASETS },
+ { "Contigs", XsdType::DATASETS },
+ { "ContigSet", XsdType::DATASETS },
+ { "ControlReadLenDist", XsdType::DATASETS },
+ { "ControlReadQualDist", XsdType::DATASETS },
+ { "DataSetMetdata", XsdType::DATASETS },
+ { "DataSet", XsdType::DATASETS },
+ { "DataSets", XsdType::DATASETS },
+ { "Filter", XsdType::DATASETS },
+ { "Filters", XsdType::DATASETS },
+ { "HdfSubreadSet", XsdType::DATASETS },
+ { "InsertReadLenDist", XsdType::DATASETS },
+ { "InsertReadQualDist" , XsdType::DATASETS },
+ { "MedianInsertDist", XsdType::DATASETS },
+ { "NumRecords", XsdType::DATASETS },
+ { "NumSequencingZmws", XsdType::DATASETS },
+ { "Organism", XsdType::DATASETS },
+ { "ParentTool", XsdType::DATASETS },
+ { "Ploidy", XsdType::DATASETS },
+ { "ProdDist", XsdType::DATASETS },
+ { "Provenance", XsdType::DATASETS },
+ { "ReadLenDist", XsdType::DATASETS },
+ { "ReadQualDist", XsdType::DATASETS },
+ { "ReadTypeDist", XsdType::DATASETS },
+ { "ReferenceSet", XsdType::DATASETS },
+ { "ShortInsertFraction", XsdType::DATASETS },
+ { "SubreadSet", XsdType::DATASETS },
+ { "SummaryStats", XsdType::DATASETS },
+ { "TotalLength", XsdType::DATASETS },
+
+ // 'pbmeta' elements
+ //
+ { "Automation", XsdType::COLLECTION_METADATA },
+ { "AutomationName", XsdType::COLLECTION_METADATA },
+ { "CellIndex", XsdType::COLLECTION_METADATA },
+ { "CellPac", XsdType::COLLECTION_METADATA },
+ { "CollectionFileCopy", XsdType::COLLECTION_METADATA },
+ { "CollectionMetadata", XsdType::COLLECTION_METADATA },
+ { "CollectionNumber", XsdType::COLLECTION_METADATA },
+ { "CollectionPathUri", XsdType::COLLECTION_METADATA },
+ { "Collections", XsdType::COLLECTION_METADATA },
+ { "Concentration", XsdType::COLLECTION_METADATA },
+ { "ConfigFileName", XsdType::COLLECTION_METADATA },
+ { "CopyFiles", XsdType::COLLECTION_METADATA },
+ { "InstCtrlVer", XsdType::COLLECTION_METADATA },
+ { "MetricsVerbosity", XsdType::COLLECTION_METADATA },
+ { "Name", XsdType::COLLECTION_METADATA },
+ { "OutputOptions", XsdType::COLLECTION_METADATA },
+ { "PlateId", XsdType::COLLECTION_METADATA },
+ { "Primary", XsdType::COLLECTION_METADATA },
+ { "Readout", XsdType::COLLECTION_METADATA },
+ { "ResultsFolder", XsdType::COLLECTION_METADATA },
+ { "RunDetails", XsdType::COLLECTION_METADATA },
+ { "RunId", XsdType::COLLECTION_METADATA },
+ { "SampleReuseEnabled", XsdType::COLLECTION_METADATA },
+ { "SequencingCondition", XsdType::COLLECTION_METADATA },
+ { "SigProcVer", XsdType::COLLECTION_METADATA },
+ { "SizeSelectionEnabled", XsdType::COLLECTION_METADATA },
+ { "StageHotstartEnabled", XsdType::COLLECTION_METADATA },
+ { "UseCount", XsdType::COLLECTION_METADATA },
+ { "WellName", XsdType::COLLECTION_METADATA },
+ { "WellSample", XsdType::COLLECTION_METADATA },
+
+ // 'pbsample' elements
+ //
+ { "BioSample", XsdType::SAMPLE_INFO },
+ { "BioSamplePointer", XsdType::SAMPLE_INFO },
+ { "BioSamplePointers", XsdType::SAMPLE_INFO },
+ { "BioSamples", XsdType::SAMPLE_INFO }
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+// ---------------
+// NamespaceInfo
+// ---------------
+
+NamespaceInfo::NamespaceInfo(void) { }
+
+NamespaceInfo::NamespaceInfo(const string& name,
+ const string& uri)
+ : name_(name)
+ , uri_(uri)
+{ }
+
+// -------------------
+// NamespaceRegistry
+// -------------------
+
+NamespaceRegistry::NamespaceRegistry(void)
+ : data_(internal::DefaultRegistry())
+ , defaultXsdType_(XsdType::DATASETS)
+{ }
+
+NamespaceRegistry::NamespaceRegistry(const NamespaceRegistry &other)
+ : data_(other.data_)
+ , defaultXsdType_(other.defaultXsdType_)
+{ }
+
+NamespaceRegistry::NamespaceRegistry(NamespaceRegistry &&other)
+ : data_(std::move(other.data_))
+ , defaultXsdType_(std::move(other.defaultXsdType_))
+{ }
+
+NamespaceRegistry& NamespaceRegistry::operator=(const NamespaceRegistry& other)
+{
+ data_ = other.data_;
+ defaultXsdType_ = other.defaultXsdType_;
+ return *this;
+}
+
+NamespaceRegistry& NamespaceRegistry::operator=(NamespaceRegistry&& other)
+{
+ data_ = std::move(other.data_);
+ defaultXsdType_ = std::move(other.defaultXsdType_);
+ return *this;
+}
+
+NamespaceRegistry::~NamespaceRegistry(void) { }
+
+const NamespaceInfo& NamespaceRegistry::DefaultNamespace(void) const
+{ return Namespace(DefaultXsd()); }
+
+XsdType NamespaceRegistry::DefaultXsd(void) const
+{ return defaultXsdType_; }
+
+const NamespaceInfo& NamespaceRegistry::Namespace(const XsdType& xsd) const
+{ return data_.at(xsd); }
+
+void NamespaceRegistry::Register(const XsdType& xsd, const NamespaceInfo& namespaceInfo)
+{ data_[xsd] = namespaceInfo; }
+
+void NamespaceRegistry::SetDefaultXsd(const XsdType& xsd)
+{ defaultXsdType_ = xsd; }
+
+XsdType NamespaceRegistry::XsdForElement(const std::string& elementLabel) const
+{
+ const auto iter = internal::elementRegistry.find(elementLabel);
+ return (iter == internal::elementRegistry.cend() ? XsdType::NONE : iter->second);
+}
+
+XsdType NamespaceRegistry::XsdForUri(const std::string& uri) const
+{
+ map<XsdType, NamespaceInfo>::const_iterator iter = data_.cbegin();
+ map<XsdType, NamespaceInfo>::const_iterator end = data_.cend();
+ for ( ; iter != end; ++iter ) {
+ const NamespaceInfo& info = iter->second;
+ if (info.Uri() == uri)
+ return iter->first;
+ }
+ return XsdType::NONE;
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file EntireFileQuery.cpp
+/// \brief Implements the EntireFileQuery class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/EntireFileQuery.h"
+#include "pbbam/CompositeBamReader.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+struct EntireFileQuery::EntireFileQueryPrivate
+{
+ EntireFileQueryPrivate(const DataSet& dataset)
+ : reader_(dataset)
+ { }
+
+ SequentialCompositeBamReader reader_;
+};
+
+EntireFileQuery::EntireFileQuery(const DataSet &dataset)
+ : internal::IQuery()
+ , d_(new EntireFileQueryPrivate(dataset))
+{ }
+
+EntireFileQuery::~EntireFileQuery(void) { }
+
+bool EntireFileQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file EnumClassHash.h
+/// \brief Defines the EnumClassHash class.
+//
+// Author: Derek Barnett
+
+#ifndef ENUMCLASSHASH_H
+#define ENUMCLASSHASH_H
+
+#include <cstddef>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+///
+/// \brief The EnumClassHash struct enables the use of enum class types as keys
+/// for std::unordered_map.
+///
+/// Allows something like:
+///
+/// \code{.cpp}
+/// std::unordered_map<Key_t, Value_t, EnumClassHash> myLookup;
+/// \endcode
+///
+/// where Key_t is an enum class. Without this sort of extra hand-holding to
+/// provide a 'manual' hash value, enum classes as keys will fail to compile.
+///
+/// \note This approach might be unnecessary in C++14, if I understand some of
+/// the changes correctly. But this works for C++11 and should continue beyond.
+///
+/// \sa http://stackoverflow.com/questions/18837857/cant-use-enum-class-as-unordered-map-key
+///
+struct EnumClassHash
+{
+ // *** NOTE ***
+ //
+ // Remove this when we integrate pbcopper.
+ // This is a duplicate of pbcopper/utility/EnumClassHash.h
+ //
+
+ template<typename T> size_t operator()(const T t) const
+ { return static_cast<size_t>(t); }
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // ENUMCLASSHASH_H
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file FastaReader.cpp
+/// \brief Implements the FastaReader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/FastaReader.h"
+#include <htslib/faidx.h>
+#include <stdexcept>
+#include <fstream>
+#include <iostream>
+#include <limits>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct FastaReaderPrivate
+{
+ ifstream stream_;
+ string name_;
+ string bases_;
+
+ FastaReaderPrivate(const std::string& fn)
+ : stream_(fn)
+ {
+ if (!stream_)
+ throw std::runtime_error("FastaReader - could not open " + fn + " for reading");
+ FetchNext();
+ }
+
+ bool GetNext(FastaSequence& record)
+ {
+ if (name_.empty() && bases_.empty())
+ return false;
+ record = FastaSequence { name_, bases_ };
+ FetchNext();
+ return true;
+ }
+
+private:
+ void FetchNext(void)
+ {
+ name_.clear();
+ bases_.clear();
+
+ SkipNewlines();
+ ReadName();
+ ReadBases();
+ }
+
+ inline void SkipNewlines(void)
+ {
+ if (!stream_)
+ return;
+ if (stream_.peek() == '\n')
+ stream_.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
+ }
+
+ void ReadName(void) {
+ if (!stream_)
+ return;
+ if (stream_.get() == '>')
+ std::getline(stream_, name_, '\n');
+ }
+
+ void ReadBases(void)
+ {
+ if (!stream_)
+ return;
+ char c = static_cast<char>(stream_.peek());
+ string line;
+ while (c != '>') {
+ if (!stream_)
+ return;
+ std::getline(stream_, line, '\n');
+ bases_ += line;
+ if (!stream_)
+ return;
+ c = static_cast<char>(stream_.peek());
+ }
+ }
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+FastaReader::FastaReader(const std::string& fn)
+ : d_{ new internal::FastaReaderPrivate{ fn } }
+{ }
+
+FastaReader::FastaReader(FastaReader&& other)
+ : d_{ std::move(other.d_) }
+{ }
+
+FastaReader& FastaReader::operator=(FastaReader&& other)
+{
+ d_.swap(other.d_);
+ return *this;
+}
+
+FastaReader::~FastaReader(void) { }
+
+bool FastaReader::GetNext(FastaSequence& record)
+{ return d_->GetNext(record); }
+
+vector<FastaSequence> FastaReader::ReadAll(const string& fn)
+{
+ vector<FastaSequence> result;
+ result.reserve(256);
+ FastaReader reader{ fn };
+ FastaSequence s;
+ while(reader.GetNext(s))
+ result.emplace_back(s);
+ return result;
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "FileProducer.h"
+#include <exception>
+#include <cstdio>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+FileProducer::FileProducer(const string& targetFilename)
+ : FileProducer(targetFilename, targetFilename + ".tmp")
+{ }
+
+FileProducer::FileProducer(const string& targetFilename,
+ const string& tempFilename)
+ : targetFilename_(targetFilename)
+ , tempFilename_(tempFilename)
+{
+ // override renaming if writing to stdout
+ //
+ // setting temp filename to '-' keeps consistent interfaces
+ // for derived classes to actually operate on temp filename
+ if (targetFilename_ == "-")
+ tempFilename_ = "-";
+}
+
+FileProducer::~FileProducer(void)
+{
+ // skip renaming if there is a 'live' exception
+ // or if writing to stdout
+ if ((std::current_exception() == nullptr) && (tempFilename_ != "-")) {
+ std::rename(tempFilename_.c_str(),
+ targetFilename_.c_str());
+ }
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef FILEPRODUCER_H
+#define FILEPRODUCER_H
+
+#include <string>
+#include <stdio.h>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// The FileProducer class provides functionality for working with a temp
+// file until successful destruction of a FileProducer-derived class.
+//
+// Derived classes should be sure to flush/close the temp file, and the
+// FileProducer's destructor will ensure that the temp file will be renamed to
+// the target filename.
+//
+// If destruction is triggered by an exception, no renaming will occur.
+//
+class FileProducer {
+
+protected:
+ FileProducer(void) = delete;
+
+ // Initializes FileProducer with specified target filename. Temp filename is
+ // set to target filename plus ".tmp" suffix.
+ FileProducer(const std::string& targetFilename);
+
+ // Initializes FileProducer with specified target filename & explicit temp
+ // filename.
+ FileProducer(const std::string& targetFilename,
+ const std::string& tempFilename);
+
+ // Renames temp file to target filename.
+ //
+ // Derived classes should ensure that data is flushed and file handle closed
+ // before or during their destructor.
+ //
+ // Remaming will not occur if there is a 'live' exception being thrown.
+ //
+ ~FileProducer(void);
+
+protected:
+ const std::string& TargetFilename(void) const
+ { return targetFilename_; }
+
+ const std::string& TempFilename(void) const
+ { return tempFilename_; }
+
+private:
+ std::string targetFilename_;
+ std::string tempFilename_;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // FILEPRODUCER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "FileUtils.h"
+#include "StringUtils.h"
+#include <boost/algorithm/string.hpp>
+#include <exception>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <cassert>
+#include <sys/stat.h>
+#include <unistd.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// pops "file://" scheme off the front of a URI/filepath, if found
+static string removeFileUriScheme(const string& uri)
+{
+ assert(!uri.empty());
+
+ auto schemeLess = uri;
+ const auto fileScheme = string{"file://"};
+ const auto schemeFound = schemeLess.find(fileScheme);
+ if (schemeFound != string::npos) {
+ if (schemeFound != 0)
+ throw runtime_error("Malformed URI: scheme not at beginning");
+ schemeLess = schemeLess.substr(fileScheme.size());
+ }
+ return schemeLess;
+}
+
+#ifdef PBBAM_WIN_FILEPATHS
+
+static
+string removeDiskName(const string& filePath)
+{
+ if (filePath.size() >= 2) {
+ const char firstChar = filePath.at(0);
+ if ((isalpha(firstChar) != 0) && (filePath.at(1) == ':'))
+ return filePath.substr(2);
+ }
+ return filePath;
+}
+
+static const char native_pathSeparator = '\\';
+
+static bool native_pathIsAbsolute(const string& filePath)
+{
+ assert(!filePath.empty());
+
+ // if starts with single slash or double slash
+ if (boost::algorithm::starts_with(filePath, "\\"))
+ return true;
+
+ // if starts with single or double-dots -> not absolute
+ if (boost::algorithm::starts_with(filePath, "."))
+ return false;
+
+ // if starts with disk drive name and colon ("C:\foo\bar.txt")
+ // strip the drive name and check to see if the remaining path is absolute
+ if (filePath.size() >= 2) {
+ const char firstChar = filePath.at(0);
+ if ((isalpha(firstChar) != 0) && (filePath.at(1) == ':'))
+ return native_pathIsAbsolute(removeDiskName(filePath));
+ }
+
+ // otherwise, likely relative
+ return false;
+}
+
+static string native_resolvedFilePath(const string& filePath,
+ const string& from)
+{
+ // strip file:// scheme if present
+ auto schemeLess = removeFileUriScheme(filePath);
+
+ // if empty or already absolute path, just return it
+ // upfront empty check simplifies further parsing logic
+ if (schemeLess.empty() || native_pathIsAbsolute(schemeLess))
+ return schemeLess;
+
+ // else make relative from the provided 'from' directory
+ //
+ // first pop disk name, then any leading single-dot '.'
+ //
+ // since we're prepending the 'from' directory, we can remove
+ // any leading './' form our file path. this may just mean that
+ // we pop it off to add it right back (when from == '.'), but this
+ // keeps it consistent with other 'from' parent directories
+ //
+ schemeLess = removeDiskName(schemeLess);
+
+ const bool thisDirAtStart = (schemeLess.find(".") == 0);
+ if (thisDirAtStart) {
+ if (schemeLess.find(native_pathSeparator) == 1)
+ schemeLess = schemeLess.substr(2);
+ }
+ return from + native_pathSeparator + schemeLess;
+}
+
+#else // else for non-Windows systems
+
+static const char native_pathSeparator = '/';
+
+static bool native_pathIsAbsolute(const string& filePath)
+{ return filePath.at(0) == '/'; }
+
+static string native_resolvedFilePath(const string& filePath,
+ const string& from)
+{
+ // strip file:// scheme if present
+ auto schemeLess = removeFileUriScheme(filePath);
+
+ // if empty or already absolute path, just return it
+ // upfront empty check simplifies further parsing logic
+ if (schemeLess.empty() || native_pathIsAbsolute(schemeLess))
+ return schemeLess;
+
+ // else make relative from the provided 'from' directory
+ //
+ // since we're prepending the 'from' directory, we can remove
+ // any leading './' form our file path. this may just mean that
+ // we pop it off to add it right back (when from == '.'), but this
+ // keeps it consistent with other 'from' parent directories
+ //
+ const bool thisDirAtStart = (schemeLess.find(".") == 0);
+ if (thisDirAtStart) {
+ if (schemeLess.find(native_pathSeparator) == 1)
+ schemeLess = schemeLess.substr(2);
+ }
+ return from + native_pathSeparator + schemeLess;
+}
+
+#endif // PBBAM_WIN_FILEPATHS
+
+// see http://stackoverflow.com/questions/2869594/how-return-a-stdstring-from-cs-getcwd-function
+string FileUtils::CurrentWorkingDirectory(void)
+{
+ const size_t chunkSize = 1024;
+ const size_t maxNumChunks = 20;
+
+ // stack-based buffer for 'normal' case
+ char buffer[chunkSize];
+ if (getcwd(buffer, sizeof(buffer)) != NULL)
+ return string(buffer);
+
+ // if error is not ERANGE, then it's not a problem of too-long name... something else happened
+ if (errno != ERANGE)
+ throw runtime_error("could not determine current working directory path");
+
+ // long path - use heap, trying progressively longer buffers
+ for (size_t chunks = 2; chunks < maxNumChunks; ++chunks) {
+ unique_ptr<char> cwd(new char[chunkSize*chunks]);
+ if (getcwd(cwd.get(), chunkSize*chunks) != NULL)
+ return string(cwd.get());
+
+ // if error is not ERANGE, then it's not a problem of too-long name... something else happened
+ if (errno != ERANGE)
+ throw runtime_error("could not determine current working directory path");
+ }
+
+ // crazy long path name
+ throw runtime_error("could determine current working directory - extremely long path");
+}
+
+string FileUtils::DirectoryName(const string& file)
+{
+ const size_t found = file.rfind(Separator(), file.length());
+ if (found != string::npos)
+ return file.substr(0, found);
+ return string(".");
+}
+
+bool FileUtils::Exists(const char* fn)
+{
+ struct stat buf;
+ return (stat(fn, &buf) != -1);
+}
+
+chrono::system_clock::time_point FileUtils::LastModified(const char* fn)
+{
+ struct stat s;
+ if (stat(fn, &s) != 0)
+ throw runtime_error("could not get file timestamp");
+ return chrono::system_clock::from_time_t(s.st_mtime);
+}
+
+string FileUtils::ResolvedFilePath(const string& filePath,
+ const string& from)
+{ return native_resolvedFilePath(filePath, from); }
+
+constexpr char FileUtils::Separator(void)
+{ return native_pathSeparator; }
+
+off_t FileUtils::Size(const char* fn)
+{
+ struct stat s;
+ if (stat(fn, &s) != 0)
+ throw runtime_error("could not determine file size");
+ return s.st_size;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef FILEUTILS_H
+#define FILEUTILS_H
+
+#include <chrono>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct FileUtils
+{
+public:
+
+ /// \returns application's current working directory
+ static std::string CurrentWorkingDirectory(void);
+
+ /// Parses a filepath for the the directory name for a file.
+ ///
+ /// Essentially this method strips the filename from the string provided (/path/to/file => /path/to).
+ /// If only a filename is provided, then "." is returned to indicate the current directory.
+ ///
+ /// \param[in] file name of file (can be just a filename or path/to/filename)
+ /// \returns file's directory name
+ ///
+ static std::string DirectoryName(const std::string& file);
+
+ /// Check for existence of a file.
+ ///
+ /// \param[in] fn full path to file
+ /// \returns true if file exists & can be opened
+ ///
+ static bool Exists(const char* fn);
+
+ /// Check for existence of a file.
+ ///
+ /// \param[in] fn full path to file
+ /// \returns true if file exists & can be opened
+ ///
+ static bool Exists(const std::string& fn);
+
+ /// Check "last modified" timestamp for a file.
+ ///
+ /// \param[in] fn full path to file
+ /// \returns time of last modification
+ /// \throws runtime_error if file info can't be accessed
+ ///
+ static std::chrono::system_clock::time_point LastModified(const char* fn);
+
+ /// Check "last modified" timestamp for a file.
+ ///
+ /// \param[in] fn full path to file
+ /// \returns time of last modification
+ /// \throws runtime_error if file info can't be accessed
+ ///
+ static std::chrono::system_clock::time_point LastModified(const std::string& fn);
+
+ /// Resolves input file path using optional starting directory.
+ ///
+ /// \verbatim
+ /// /absolute/path/to/file.txt => /absolute/path/to/file.txt
+ /// ../relative/path/to/file.txt => <from>/../relative/path/to/file.txt
+ /// file.txt => <from>/file.txt
+ /// \endverbatim
+ ///
+ /// \note This method will strip any URI scheme as well ("file://") so that the result is immediately ready from I/O operations.
+ ///
+ /// \param[in] filePath file path to be resolved
+ /// \param[in] from optional starting directory (useful if not same as application's working directory)
+ /// \returns resolved file path
+ ///
+ static std::string ResolvedFilePath(const std::string& filePath,
+ const std::string& from = ".");
+
+ /// \returns native path separator
+ constexpr static char Separator(void);
+
+ /// Check size of file.
+ ///
+ /// \param[in] fn full path to file
+ /// \returns file size in bytes
+ /// \throws runtime_error if file info can't be accessed
+ ///
+ static off_t Size(const char* fn);
+
+ /// Check size of file.
+ ///
+ /// \param[in] fn full path to file
+ /// \returns file size in bytes
+ /// \throws runtime_error if file info can't be accessed
+ ///
+ static off_t Size(const std::string& fn);
+};
+
+inline bool FileUtils::Exists(const std::string& fn)
+{ return FileUtils::Exists(fn.c_str()); }
+
+inline std::chrono::system_clock::time_point FileUtils::LastModified(const std::string& fn)
+{ return FileUtils::LastModified(fn.c_str()); }
+
+inline off_t FileUtils::Size(const std::string& fn)
+{ return FileUtils::Size(fn.c_str()); }
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // FILEUTILS_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "FofnReader.h"
+#include <iostream>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+vector<string> FofnReader::Files(istream& in)
+{
+ vector<string> files;
+ string fn;
+ while (getline(in, fn))
+ files.push_back(fn);
+ return files;
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef FOFNREADER_H
+#define FOFNREADER_H
+
+#include "pbbam/DataSet.h"
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class FofnReader
+{
+public:
+ static std::vector<std::string> Files(std::istream& in);
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // FOFNREADER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Frames.cpp
+/// \brief Implements the Frames class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Frames.h"
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static vector<uint16_t> framepoints;
+static vector<uint8_t> frameToCode;
+static uint16_t maxFramepoint;
+
+static
+void InitIpdDownsampling(void)
+{
+ if (!framepoints.empty())
+ return;
+
+ // liftover from Dave's python code:
+ // .../bioinformatics/tools/kineticsTools/kineticsTools/_downsampling.py
+
+ const int B = 2;
+ const int t = 6;
+ const double T = pow(B, t);
+
+ int next = 0;
+ double grain;
+ const int end = 256/T;
+ for (int i = 0; i < end; ++i) {
+ grain = pow(B, i);
+ vector<uint16_t> nextOnes;
+ for (double j = 0; j < T; ++j)
+ nextOnes.push_back(j*grain + next);
+ next = nextOnes.back() + grain;
+ framepoints.insert(framepoints.end(), nextOnes.cbegin(), nextOnes.cend());
+ }
+ assert(framepoints.size()-1 <= UINT8_MAX);
+
+ const uint16_t maxElement = (*max_element(framepoints.cbegin(), framepoints.cend()));
+ frameToCode.assign(maxElement+1, 0);
+
+ const int fpEnd = framepoints.size() - 1;
+ uint8_t i = 0;
+ uint16_t fl = 0;
+ uint16_t fu = 0;
+ for (; i < fpEnd; ++i) {
+ fl = framepoints[i];
+ fu = framepoints[i+1];
+ if (fu > fl+1) {
+ const int middle = (fl+fu)/2;
+ for (int f = fl; f < middle; ++f)
+ frameToCode[f] = i;
+ for (int f = middle; f < fu; ++f)
+ frameToCode[f] = i+1;
+ } else
+ frameToCode[fl] = i;
+ }
+
+ // this next line differs from the python implementation (there, it's "i+1")
+ // our C++ for loop has incremented our index counter one more time than the indexes from python enumerate(...)
+ frameToCode[fu] = i;
+ maxFramepoint = fu;
+}
+
+static inline
+uint16_t CodeToFrames(const uint8_t code)
+{
+ return framepoints[code];
+}
+
+static
+vector<uint16_t> CodeToFrames(const vector<uint8_t>& codedData)
+{
+ InitIpdDownsampling();
+
+ const size_t length = codedData.size();
+ vector<uint16_t> frames(length, 0);
+ for (size_t i = 0; i < length; ++i)
+ frames[i] = CodeToFrames(codedData[i]);
+ return frames;
+}
+
+static inline
+uint8_t FramesToCode(const uint16_t frame)
+{
+ return frameToCode[std::min(maxFramepoint, frame)];
+}
+
+static
+vector<uint8_t> FramesToCode(const vector<uint16_t>& frames)
+{
+ InitIpdDownsampling();
+
+ const size_t length = frames.size();
+ vector<uint8_t> result(length, 0);
+ for (size_t i = 0; i < length; ++i)
+ result[i] = FramesToCode(frames[i]);
+ return result;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+Frames::Frames(void)
+{ }
+
+Frames::Frames(const std::vector<uint16_t>& frames)
+ : data_(frames)
+{ }
+
+Frames::Frames(std::vector<uint16_t>&& frames)
+ : data_(std::move(frames))
+{ }
+
+Frames::Frames(const Frames& other)
+ : data_(other.data_)
+{ }
+
+Frames::Frames(Frames&& other)
+ : data_(std::move(other.data_))
+{ }
+
+Frames::~Frames(void) { }
+
+Frames& Frames::operator=(const Frames& other)
+{ data_ = other.data_; return *this; }
+
+Frames& Frames::operator=(Frames&& other)
+{ data_ = std::move(other.data_); return *this; }
+
+Frames Frames::Decode(const std::vector<uint8_t>& codedData)
+{ return Frames(internal::CodeToFrames(codedData)); }
+
+std::vector<uint8_t> Frames::Encode(const std::vector<uint16_t>& frames)
+{ return internal::FramesToCode(frames); }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file GenomicInterval.cpp
+/// \brief Implements the GenomicInterval class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/GenomicInterval.h"
+#include "AssertUtils.h"
+#include "StringUtils.h"
+#include <cstdlib>
+#include <cstring>
+#include <ctype.h>
+#include <stdexcept>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// returns sequence name & sets begin/end, from input regionString
+string parseRegionString(const string& reg,
+ PacBio::BAM::Position* begin,
+ PacBio::BAM::Position* end)
+{
+ const vector<string> parts = internal::Split(reg, ':');
+ if (parts.empty() || parts.size() > 2)
+ throw std::runtime_error("malformed region string");
+
+ // given name only, default min,max intervals
+ if (parts.size() == 1) {
+ *begin = 0;
+ *end = 1<<29;
+ }
+
+ // parse interval from input
+ else if (parts.size() == 2) {
+ const vector<string> intervalParts = internal::Split(parts.at(1), '-');
+ if (intervalParts.empty() || intervalParts.size() >2 )
+ throw std::runtime_error("malformed region string");
+ *begin = std::stoi(intervalParts.at(0));
+ *end = std::stoi(intervalParts.at(1));
+ }
+
+ return parts.at(0);
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+GenomicInterval::GenomicInterval(void) { }
+
+GenomicInterval::GenomicInterval(const std::string& name,
+ const Position& start,
+ const Position& stop)
+ : name_(name)
+ , interval_(start, stop)
+{ }
+
+GenomicInterval::GenomicInterval(const string& samtoolsRegionString)
+{
+ Position begin;
+ Position end;
+ name_ = internal::parseRegionString(samtoolsRegionString, &begin, &end);
+ interval_ = PacBio::BAM::Interval<Position>(begin, end);
+}
+
+GenomicInterval::GenomicInterval(const GenomicInterval& other)
+ : name_(other.name_)
+ , interval_(other.interval_)
+{ }
+
+GenomicInterval& GenomicInterval::operator=(const GenomicInterval& other)
+{
+ name_ = other.name_;
+ interval_ = other.interval_;
+ return *this;
+}
+
+bool GenomicInterval::CoveredBy(const GenomicInterval& other) const
+{
+ if (name_ != other.name_)
+ return false;
+ return interval_.CoveredBy(other.interval_);
+}
+
+bool GenomicInterval::Covers(const GenomicInterval& other) const
+{
+ if (name_ != other.name_)
+ return false;
+ return interval_.Covers(other.interval_);
+}
+
+bool GenomicInterval::Intersects(const GenomicInterval& other) const
+{
+ if (name_ != other.name_)
+ return false;
+ return interval_.Intersects(other.interval_);
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file GenomicIntervalQuery.cpp
+/// \brief Implements the GenomicIntervalQuery class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/GenomicIntervalQuery.h"
+#include "pbbam/CompositeBamReader.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+struct GenomicIntervalQuery::GenomicIntervalQueryPrivate
+{
+ GenomicIntervalQueryPrivate(const GenomicInterval& interval,
+ const DataSet& dataset)
+ : reader_(interval, dataset)
+ { }
+
+ GenomicIntervalCompositeBamReader reader_;
+};
+
+GenomicIntervalQuery::GenomicIntervalQuery(const GenomicInterval& interval,
+ const DataSet &dataset)
+ : internal::IQuery()
+ , d_(new GenomicIntervalQueryPrivate(interval, dataset))
+{ }
+
+GenomicIntervalQuery::~GenomicIntervalQuery(void) { }
+
+bool GenomicIntervalQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
+
+GenomicIntervalQuery& GenomicIntervalQuery::Interval(const GenomicInterval& interval)
+{ d_->reader_.Interval(interval); return *this; }
+
+const GenomicInterval& GenomicIntervalQuery::Interval(void) const
+{ return d_->reader_.Interval(); }
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file IRecordWriter.cpp
+/// \brief Implements the IRecordWriter class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/IRecordWriter.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+IRecordWriter::IRecordWriter(void) { }
+
+IRecordWriter::~IRecordWriter(void) { }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file IndexedFastaReader.cpp
+/// \brief Implements the IndexedFastaReader class.
+//
+// Author: David Alexander
+
+#include "pbbam/IndexedFastaReader.h"
+
+#include "pbbam/BamRecord.h"
+#include "pbbam/GenomicInterval.h"
+#include "pbbam/Orientation.h"
+#include "SequenceUtils.h"
+#include <htslib/faidx.h>
+#include <iostream>
+#include <cstdlib>
+
+namespace PacBio {
+namespace BAM {
+
+IndexedFastaReader::IndexedFastaReader(const std::string& filename)
+{
+ Open(filename);
+}
+
+IndexedFastaReader::IndexedFastaReader(const IndexedFastaReader& src)
+{
+ if (!Open(src.filename_))
+ throw std::runtime_error("Cannot open file " + src.filename_);
+}
+
+IndexedFastaReader& IndexedFastaReader::operator=(const IndexedFastaReader& rhs)
+{
+ if (&rhs == this)
+ return *this;
+
+ Open(rhs.filename_);
+ return *this;
+}
+
+IndexedFastaReader::~IndexedFastaReader(void)
+{
+ Close();
+}
+
+bool IndexedFastaReader::Open(const std::string &filename)
+{
+ faidx_t* handle = fai_load(filename.c_str());
+ if (handle == nullptr)
+ return false;
+ else
+ {
+ filename_ = filename;
+ handle_ = handle;
+ return true;
+ }
+}
+
+void IndexedFastaReader::Close(void)
+{
+ filename_ = "";
+ if (handle_ != nullptr)
+ fai_destroy(handle_);
+ handle_ = nullptr;
+}
+
+#define REQUIRE_FAIDX_LOADED if (handle_ == nullptr) throw std::exception()
+
+std::string IndexedFastaReader::Subsequence(const std::string& id,
+ Position begin,
+ Position end) const
+{
+ REQUIRE_FAIDX_LOADED;
+
+ int len;
+ // Derek: *Annoyingly* htslib seems to interpret "end" as inclusive in
+ // faidx_fetch_seq, whereas it considers it exclusive in the region spec in
+ // fai_fetch. Can you please verify?
+ char* rawSeq = faidx_fetch_seq(handle_, id.c_str(), begin, end - 1, &len);
+ if (rawSeq == nullptr)
+ throw std::runtime_error("could not fetch FASTA sequence");
+ else {
+ std::string seq(rawSeq);
+ free(rawSeq);
+ return seq;
+ }
+}
+
+std::string IndexedFastaReader::Subsequence(const GenomicInterval& interval) const
+{
+ REQUIRE_FAIDX_LOADED;
+ return Subsequence(interval.Name(), interval.Start(), interval.Stop());
+}
+
+std::string IndexedFastaReader::Subsequence(const char *htslibRegion) const
+{
+ REQUIRE_FAIDX_LOADED;
+
+ int len;
+ char* rawSeq = fai_fetch(handle_, htslibRegion, &len);
+ if (rawSeq == nullptr)
+ throw std::runtime_error("could not fetch FASTA sequence");
+ else {
+ std::string seq(rawSeq);
+ free(rawSeq);
+ return seq;
+ }
+}
+
+
+std::string
+IndexedFastaReader::ReferenceSubsequence(const BamRecord& bamRecord,
+ const Orientation orientation,
+ const bool gapped,
+ const bool exciseSoftClips) const
+{
+ REQUIRE_FAIDX_LOADED;
+
+ std::string subseq = Subsequence(bamRecord.ReferenceName(),
+ bamRecord.ReferenceStart(),
+ bamRecord.ReferenceEnd());
+ const auto reverse = orientation != Orientation::GENOMIC &&
+ bamRecord.Impl().IsReverseStrand();
+
+ if (bamRecord.Impl().IsMapped() && gapped)
+ {
+ size_t seqIndex = 0;
+ const Cigar& cigar = bamRecord.Impl().CigarData();
+ Cigar::const_iterator cigarIter = cigar.cbegin();
+ Cigar::const_iterator cigarEnd = cigar.cend();
+ for (; cigarIter != cigarEnd; ++cigarIter)
+ {
+ const CigarOperation& op = (*cigarIter);
+ const CigarOperationType& type = op.Type();
+
+ // do nothing for hard clips
+ if (type != CigarOperationType::HARD_CLIP)
+ {
+ const size_t opLength = op.Length();
+
+ // maybe remove soft clips
+ if (type == CigarOperationType::SOFT_CLIP)
+ {
+ if (!exciseSoftClips)
+ {
+ subseq.reserve(subseq.size() + opLength);
+ subseq.insert(seqIndex, opLength, '-');
+ seqIndex += opLength;
+ }
+ }
+
+ // for non-clipping operations
+ else {
+
+ // maybe add gaps/padding
+ if (type == CigarOperationType::INSERTION)
+ {
+ subseq.reserve(subseq.size() + opLength);
+ subseq.insert(seqIndex, opLength, '-');
+ }
+ else if (type == CigarOperationType::PADDING)
+ {
+ subseq.reserve(subseq.size() + opLength);
+ subseq.insert(seqIndex, opLength, '*');
+ }
+
+ // update index
+ seqIndex += opLength;
+ }
+ }
+ }
+ }
+
+ if (reverse)
+ internal::ReverseComplementCaseSens(subseq);
+
+ return subseq;
+}
+
+
+int IndexedFastaReader::NumSequences(void) const
+{
+ REQUIRE_FAIDX_LOADED;
+ return faidx_nseq(handle_);
+}
+
+bool IndexedFastaReader::HasSequence(const std::string& name) const
+{
+ REQUIRE_FAIDX_LOADED;
+ return (faidx_has_seq(handle_, name.c_str()) != 0);
+}
+
+int IndexedFastaReader::SequenceLength(const std::string& name) const
+{
+ REQUIRE_FAIDX_LOADED;
+ int len = faidx_seq_len(handle_, name.c_str());
+ if (len < 0)
+ throw std::runtime_error("could not determine FASTA sequence length");
+ else return len;
+}
+
+}} // PacBio::BAM
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file MD5.cpp
+/// \brief Implements basic MD5 hash utilities
+//
+// Author: Brett Bowman
+
+#include "pbbam/MD5.h"
+#include <cram/md5.h>
+
+namespace PacBio {
+namespace BAM {
+
+/// \brief MD5 hash of a string as a 32-digit hexadecimal string
+///
+std::string MD5Hash(const std::string& str)
+{
+ MD5_CTX md5;
+ unsigned char digest[16];
+ char hexdigest[33];
+
+ MD5_Init(&md5);
+ MD5_Update(&md5, reinterpret_cast<void*>(const_cast<char*>(str.c_str())), str.size());
+ MD5_Final(digest, &md5);
+
+ for (int i = 0; i < 16; ++i)
+ sprintf(&hexdigest[2*i], "%02x", digest[i]);
+
+ return std::string{hexdigest, 32};
+}
+
+} // namespace BAM
+} // namespace PacBio
+
+
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "MemoryUtils.h"
+#include <string>
+#include <cstdlib>
+#include <cstring>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+// -----------------
+// BamHeaderMemory
+// -----------------
+
+BamHeader BamHeaderMemory::FromRawData(bam_hdr_t* hdr)
+{
+ // null input - error
+ if (hdr == nullptr)
+ throw std::runtime_error("invalid BAM header");
+
+ // empty text input - ok
+ if (hdr->text == nullptr || hdr->l_text == 0)
+ return BamHeader();
+
+ // parse normal SAM text input
+ return BamHeader(string(hdr->text, hdr->l_text));
+}
+
+PBBAM_SHARED_PTR<bam_hdr_t> BamHeaderMemory::MakeRawHeader(const BamHeader& header)
+{
+ const string& text = header.ToSam();
+ PBBAM_SHARED_PTR<bam_hdr_t> rawData(sam_hdr_parse(text.size(), text.c_str()), internal::HtslibHeaderDeleter());
+ rawData->ignore_sam_err = 0;
+ rawData->cigar_tab = NULL;
+ rawData->l_text = text.size();
+ rawData->text = (char*)calloc(rawData->l_text + 1, 1);
+ memcpy(rawData->text, text.c_str(), rawData->l_text);
+ return rawData;
+}
+
+//PBBAM_SHARED_PTR<bam_hdr_t> BamHeaderMemory::MakeRawHeader(const BamHeader& header)
+//{
+// if (!header)
+// return PBBAM_SHARED_PTR<bam_hdr_t>(nullptr);
+// return MakeRawHeader(*header.get());
+//}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef MEMORYUTILS_H
+#define MEMORYUTILS_H
+
+#include "pbbam/Config.h"
+#include "pbbam/BamHeader.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/BamRecordImpl.h"
+#include <htslib/bgzf.h>
+#include <htslib/sam.h>
+#include <memory>
+
+namespace PacBio {
+namespace BAM {
+
+class BamHeader;
+
+namespace internal {
+
+// intended for use with PBBAM_SHARED_PTR<T>, std::unique_ptr<T>, etc
+
+struct HtslibBgzfDeleter
+{
+ void operator()(BGZF* bgzf)
+ {
+ if (bgzf)
+ bgzf_close(bgzf);
+ bgzf = nullptr;
+ }
+};
+
+struct HtslibFileDeleter
+{
+ void operator()(samFile* file)
+ {
+ if (file)
+ sam_close(file);
+ file = nullptr;
+ }
+};
+
+struct HtslibHeaderDeleter
+{
+ void operator()(bam_hdr_t* hdr)
+ {
+ if (hdr)
+ bam_hdr_destroy(hdr);
+ hdr = nullptr;
+ }
+};
+
+struct HtslibIndexDeleter
+{
+ void operator()(hts_idx_t* index)
+ {
+ if (index)
+ hts_idx_destroy(index);
+ index = nullptr;
+ }
+};
+
+struct HtslibIteratorDeleter
+{
+ void operator()(hts_itr_t* iter)
+ {
+ if (iter)
+ hts_itr_destroy(iter);
+ iter = nullptr;
+ }
+};
+
+struct HtslibRecordDeleter
+{
+ void operator()(bam1_t* b)
+ {
+ if (b)
+ bam_destroy1(b);
+ b = nullptr;
+ }
+};
+
+class BamHeaderMemory
+{
+public:
+ static BamHeader FromRawData(bam_hdr_t* header);
+ static PBBAM_SHARED_PTR<bam_hdr_t> MakeRawHeader(const BamHeader& header);
+// static PBBAM_SHARED_PTR<bam_hdr_t> MakeRawHeader(const BamHeader& header);
+};
+
+class BamRecordMemory
+{
+public:
+ static const BamRecordImpl& GetImpl(const BamRecord& r);
+ static const BamRecordImpl& GetImpl(const BamRecord* r);
+ static PBBAM_SHARED_PTR<bam1_t> GetRawData(const BamRecord& r);
+ static PBBAM_SHARED_PTR<bam1_t> GetRawData(const BamRecord* r);
+ static PBBAM_SHARED_PTR<bam1_t> GetRawData(const BamRecordImpl& impl);
+ static PBBAM_SHARED_PTR<bam1_t> GetRawData(const BamRecordImpl* impl);
+
+ static void UpdateRecordTags(const BamRecord& r);
+ static void UpdateRecordTags(const BamRecordImpl& r);
+};
+
+inline const BamRecordImpl& BamRecordMemory::GetImpl(const BamRecord& r)
+{ return r.impl_; }
+
+inline const BamRecordImpl& BamRecordMemory::GetImpl(const BamRecord* r)
+{ return r->impl_; }
+
+inline PBBAM_SHARED_PTR<bam1_t> BamRecordMemory::GetRawData(const BamRecord& r)
+{ return GetRawData(r.impl_); }
+
+inline PBBAM_SHARED_PTR<bam1_t> BamRecordMemory::GetRawData(const BamRecord* r)
+{ return GetRawData(r->impl_); }
+
+inline PBBAM_SHARED_PTR<bam1_t> BamRecordMemory::GetRawData(const BamRecordImpl& impl)
+{ return impl.d_; }
+
+inline PBBAM_SHARED_PTR<bam1_t> BamRecordMemory::GetRawData(const BamRecordImpl* impl)
+{ return impl->d_; }
+
+inline void BamRecordMemory::UpdateRecordTags(const BamRecord& r)
+{ UpdateRecordTags(r.impl_); }
+
+inline void BamRecordMemory::UpdateRecordTags(const BamRecordImpl& r)
+{ r.UpdateTagMap(); }
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // MEMORYUTILS_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiBuilder.cpp
+/// \brief Implements the PbiBuilder class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiBuilder.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/PbiRawData.h"
+#include "FileProducer.h"
+#include "MemoryUtils.h"
+#include "PbiIndexIO.h"
+#include <htslib/bgzf.h>
+#include <thread>
+#include <cassert>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// -------------------------------------------
+// PbiRawReferenceDataBuilder implementation
+// -------------------------------------------
+
+// helper for reference data
+class PbiRawReferenceDataBuilder
+{
+public:
+ PbiRawReferenceDataBuilder(const size_t numReferenceSequences);
+
+public:
+ bool AddRecord(const BamRecord& record,
+ const PbiReferenceEntry::Row rowNumber);
+ PbiRawReferenceData Result(void) const;
+
+private:
+ int32_t lastRefId_;
+ Position lastPos_;
+ map<uint32_t, PbiReferenceEntry> rawReferenceEntries_;
+};
+
+PbiRawReferenceDataBuilder::PbiRawReferenceDataBuilder(const size_t numReferenceSequences)
+ : lastRefId_(-1)
+ , lastPos_(-1)
+{
+ // initialize with number of references we expect to see
+ //
+ // we can add more later, but want to ensure known references have an entry
+ // even if no records are observed mapping to it
+ //
+ for (size_t i = 0; i < numReferenceSequences; ++i)
+ rawReferenceEntries_[i] = PbiReferenceEntry(i);
+
+ // also create an "unmapped" entry
+ rawReferenceEntries_[PbiReferenceEntry::UNMAPPED_ID] = PbiReferenceEntry();
+}
+
+bool PbiRawReferenceDataBuilder::AddRecord(const BamRecord& record,
+ const PbiReferenceEntry::Row rowNumber)
+{
+ // fetch ref ID & pos for record
+ const int32_t tId = record.ReferenceId();
+ const int32_t pos = record.ReferenceStart();
+
+ // sanity checks to protect against non-coordinate-sorted BAMs
+ if (lastRefId_ != tId || (lastRefId_ >= 0 && tId < 0)) {
+ if (tId >= 0) {
+
+ // if we've already seen unmapped reads, but our current tId is valid
+ //
+ // error: unmapped reads should all be at the end (can stop checking refs)
+ //
+ PbiReferenceEntry& unmappedEntry =
+ rawReferenceEntries_[PbiReferenceEntry::UNMAPPED_ID];
+ if (unmappedEntry.beginRow_ != PbiReferenceEntry::UNSET_ROW)
+ return false;
+
+ // if we've already seen data for this new tId
+ // (remember we're coming from another tId)
+ //
+ // error: refs are out of order (can stop checking refs)
+ //
+ PbiReferenceEntry& currentEntry =
+ rawReferenceEntries_[(uint32_t)tId];
+ if (currentEntry.beginRow_ != PbiReferenceEntry::UNSET_ROW)
+ return false;
+ }
+ lastRefId_ = tId;
+ }
+ else if (tId >= 0 && lastPos_ > pos)
+ return false; //error: positions out of order
+
+ // update row numbers
+ PbiReferenceEntry& entry = rawReferenceEntries_[(uint32_t)tId];
+ if (entry.beginRow_ == PbiReferenceEntry::UNSET_ROW)
+ entry.beginRow_ = rowNumber;
+ entry.endRow_ = rowNumber+1;
+
+ // update pos (for sorting check next go-round)
+ lastPos_ = pos;
+ return true;
+}
+
+PbiRawReferenceData PbiRawReferenceDataBuilder::Result(void) const {
+ // PbiReferenceEntries will be sorted thanks to std::map
+ // tId will be at end since we're sorting on the uint cast of -1
+ PbiRawReferenceData result;
+ result.entries_.reserve(rawReferenceEntries_.size());
+ auto refIter = rawReferenceEntries_.cbegin();
+ const auto refEnd = rawReferenceEntries_.cend();
+ for ( ; refIter != refEnd; ++refIter )
+ result.entries_.push_back(refIter->second);
+ return result;
+}
+
+// ----------------------------------
+// PbiBuilderPrivate implementation
+// ----------------------------------
+
+class PbiBuilderPrivate : public internal::FileProducer
+{
+public:
+ PbiBuilderPrivate(const string& filename,
+ const size_t numReferenceSequences,
+ const PbiBuilder::CompressionLevel compressionLevel,
+ const size_t numThreads);
+ PbiBuilderPrivate(const string& filename,
+ const size_t numReferenceSequences,
+ const bool isCoordinateSorted,
+ const PbiBuilder::CompressionLevel compressionLevel,
+ const size_t numThreads);
+ ~PbiBuilderPrivate(void);
+
+public:
+ void AddRecord(const BamRecord& record, const int64_t vOffset);
+
+public:
+ bool HasBarcodeData(void) const;
+ bool HasMappedData(void) const;
+ bool HasReferenceData(void) const;
+
+public:
+ unique_ptr<BGZF, HtslibBgzfDeleter> bgzf_;
+ PbiRawData rawData_;
+ PbiReferenceEntry::Row currentRow_;
+ unique_ptr<PbiRawReferenceDataBuilder> refDataBuilder_;
+};
+
+PbiBuilderPrivate::PbiBuilderPrivate(const string& filename,
+ const size_t numReferenceSequences,
+ const PbiBuilder::CompressionLevel compressionLevel,
+ const size_t numThreads)
+ : internal::FileProducer(filename)
+ , bgzf_(nullptr)
+ , currentRow_(0)
+ , refDataBuilder_(nullptr)
+{
+ const string& usingFilename = TempFilename();
+ const string& mode = string("wb") + to_string(static_cast<int>(compressionLevel));
+ bgzf_.reset(bgzf_open(usingFilename.c_str(), mode.c_str()));
+ if (bgzf_.get() == 0)
+ throw std::runtime_error("could not open PBI file for writing");
+
+ size_t actualNumThreads = numThreads;
+ if (actualNumThreads == 0) {
+ actualNumThreads = thread::hardware_concurrency();
+
+ // if still unknown, default to single-threaded
+ if (actualNumThreads == 0)
+ actualNumThreads = 1;
+ }
+
+ // if multithreading requested, enable it
+ if (actualNumThreads > 1)
+ bgzf_mt(bgzf_.get(), actualNumThreads, 256);
+
+ if (numReferenceSequences > 0)
+ refDataBuilder_.reset(new PbiRawReferenceDataBuilder(numReferenceSequences));
+}
+
+PbiBuilderPrivate::PbiBuilderPrivate(const string& filename,
+ const size_t numReferenceSequences,
+ const bool isCoordinateSorted,
+ const PbiBuilder::CompressionLevel compressionLevel,
+ const size_t numThreads)
+ : internal::FileProducer(filename)
+ , bgzf_(nullptr)
+ , currentRow_(0)
+ , refDataBuilder_(nullptr)
+{
+ const string& usingFilename = TempFilename();
+ const string& mode = string("wb") + to_string(static_cast<int>(compressionLevel));
+ bgzf_.reset(bgzf_open(usingFilename.c_str(), mode.c_str()));
+ if (bgzf_.get() == 0)
+ throw std::runtime_error("could not open PBI file for writing");
+
+ size_t actualNumThreads = numThreads;
+ if (actualNumThreads == 0) {
+ actualNumThreads = thread::hardware_concurrency();
+
+ // if still unknown, default to single-threaded
+ if (actualNumThreads == 0)
+ actualNumThreads = 1;
+ }
+
+ // if multithreading requested, enable it
+ if (actualNumThreads > 1)
+ bgzf_mt(bgzf_.get(), actualNumThreads, 256);
+
+ if (isCoordinateSorted && numReferenceSequences > 0)
+ refDataBuilder_.reset(new PbiRawReferenceDataBuilder(numReferenceSequences));
+}
+
+PbiBuilderPrivate::~PbiBuilderPrivate(void)
+{
+ rawData_.NumReads(currentRow_);
+
+ const auto hasBarcodeData = HasBarcodeData();
+ const auto hasMappedData = HasMappedData();
+ const auto hasReferenceData = HasReferenceData();
+
+ // fetch reference data, if available
+ if (hasReferenceData) {
+ assert(refDataBuilder_);
+ rawData_.ReferenceData() = refDataBuilder_->Result();
+ }
+
+ // determine flags
+ PbiFile::Sections sections = PbiFile::BASIC;
+ if (hasMappedData) sections |= PbiFile::MAPPED;
+ if (hasBarcodeData) sections |= PbiFile::BARCODE;
+ if (hasReferenceData) sections |= PbiFile::REFERENCE;
+ rawData_.FileSections(sections);
+
+ // write index contents to file
+ BGZF* fp = bgzf_.get();
+ PbiIndexIO::WriteHeader(rawData_, fp);
+ const uint32_t numReads = rawData_.NumReads();
+ if (numReads > 0) {
+ PbiIndexIO::WriteBasicData(rawData_.BasicData(), numReads, fp);
+ if (hasMappedData) PbiIndexIO::WriteMappedData(rawData_.MappedData(), numReads, fp);
+ if (hasReferenceData) PbiIndexIO::WriteReferenceData(rawData_.ReferenceData(), fp);
+ if (hasBarcodeData) PbiIndexIO::WriteBarcodeData(rawData_.BarcodeData(), numReads, fp);
+ }
+}
+
+void PbiBuilderPrivate::AddRecord(const BamRecord& record, const int64_t vOffset)
+{
+ // ensure updated data
+ record.ResetCachedPositions();
+
+ // store data
+ rawData_.BarcodeData().AddRecord(record);
+ rawData_.BasicData().AddRecord(record, vOffset);
+ rawData_.MappedData().AddRecord(record);
+
+ if (refDataBuilder_) {
+
+ // stop storing coordinate-sorted reference data if we encounter out-of-order record
+ const bool sorted = refDataBuilder_->AddRecord(record, currentRow_);
+ if (!sorted)
+ refDataBuilder_.reset();
+ }
+
+ // increment row counter
+ ++currentRow_;
+}
+
+bool PbiBuilderPrivate::HasBarcodeData(void) const
+{
+ // fetch data components
+ const auto& barcodeData = rawData_.BarcodeData();
+ const auto& bcForward = barcodeData.bcForward_;
+ const auto& bcReverse = barcodeData.bcReverse_;
+ const auto& bcQuality = barcodeData.bcQual_;
+
+ // ensure valid sizes
+ if (bcForward.size() != bcReverse.size() &&
+ bcForward.size() != bcQuality.size())
+ {
+ auto msg = string{ "error: inconsistency in PBI barcode data:\n" };
+ msg += string{ " bcForward has " } + to_string(bcForward.size()) + string{ " elements\n" };
+ msg += string{ " bcReverse has " } + to_string(bcReverse.size()) + string{ " elements\n" };
+ msg += string{ " bcQuality has " } + to_string(bcQuality.size()) + string{ " elements\n" };
+ msg += string{ "\n" };
+ msg += string{ " these containers should contain equal number of elements.\n" };
+ throw std::runtime_error(msg);
+ }
+ assert(bcForward.size() == rawData_.NumReads());
+
+ // check for data
+ for (uint32_t i = 0; i < rawData_.NumReads(); ++i) {
+ if (bcForward.at(i) != -1 ||
+ bcReverse.at(i) != -1 ||
+ bcQuality.at(i) != -1 )
+ {
+ return true;
+ }
+ }
+ // no actual data found
+ return false;
+}
+
+bool PbiBuilderPrivate::HasMappedData(void) const
+{
+ const auto& mappedData = rawData_.MappedData();
+ const auto& tIds = mappedData.tId_;
+ assert(tIds.size() == rawData_.NumReads());
+ for (const auto tId : tIds) {
+ if (tId >= 0)
+ return true;
+ }
+ return false; // all reads unmapped
+}
+
+bool PbiBuilderPrivate::HasReferenceData(void) const
+{ return bool(refDataBuilder_); }
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+// ---------------------------
+// PbiBuilder implementation
+// ---------------------------
+
+PbiBuilder::PbiBuilder(const string& pbiFilename,
+ const CompressionLevel compressionLevel,
+ const size_t numThreads)
+ : d_(new internal::PbiBuilderPrivate(pbiFilename,
+ 0,
+ compressionLevel,
+ numThreads))
+{ }
+
+PbiBuilder::PbiBuilder(const string& pbiFilename,
+ const size_t numReferenceSequences,
+ const CompressionLevel compressionLevel,
+ const size_t numThreads)
+ : d_(new internal::PbiBuilderPrivate(pbiFilename,
+ numReferenceSequences,
+ compressionLevel,
+ numThreads))
+{ }
+
+PbiBuilder::PbiBuilder(const string& pbiFilename,
+ const size_t numReferenceSequences,
+ const bool isCoordinateSorted,
+ const CompressionLevel compressionLevel,
+ const size_t numThreads)
+ : d_(new internal::PbiBuilderPrivate(pbiFilename,
+ numReferenceSequences,
+ isCoordinateSorted,
+ compressionLevel,
+ numThreads))
+{ }
+
+PbiBuilder::~PbiBuilder(void) { }
+
+void PbiBuilder::AddRecord(const BamRecord& record, const int64_t vOffset)
+{
+ internal::BamRecordMemory::UpdateRecordTags(record);
+ d_->AddRecord(record, vOffset);
+}
+
+const PbiRawData& PbiBuilder::Index(void) const
+{ return d_->rawData_; }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFile.cpp
+/// \brief Implements the PbiFile methods.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiFile.h"
+#include "pbbam/BamFile.h"
+#include "pbbam/PbiBuilder.h"
+#include "pbbam/BamReader.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::PbiFile;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace PbiFile {
+
+void CreateFrom(const BamFile& bamFile,
+ const PbiBuilder::CompressionLevel compressionLevel,
+ const size_t numThreads)
+{
+ PbiBuilder builder(bamFile.PacBioIndexFilename(),
+ bamFile.Header().Sequences().size(),
+ compressionLevel,
+ numThreads);
+ BamReader reader(bamFile);
+ BamRecord b;
+ int64_t offset = reader.VirtualTell();
+ while (reader.GetNext(b)) {
+ builder.AddRecord(b, offset);
+ offset = reader.VirtualTell();
+ }
+}
+
+} // namespace PbiFile
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.\r
+//\r
+// All rights reserved.\r
+//\r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted (subject to the limitations in the\r
+// disclaimer below) provided that the following conditions are met:\r
+//\r
+// * Redistributions of source code must retain the above copyright\r
+// notice, this list of conditions and the following disclaimer.\r
+//\r
+// * Redistributions in binary form must reproduce the above\r
+// copyright notice, this list of conditions and the following\r
+// disclaimer in the documentation and/or other materials provided\r
+// with the distribution.\r
+//\r
+// * Neither the name of Pacific Biosciences nor the names of its\r
+// contributors may be used to endorse or promote products derived\r
+// from this software without specific prior written permission.\r
+//\r
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE\r
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC\r
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED\r
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\r
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS\r
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\r
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF\r
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\r
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\r
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT\r
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
+// SUCH DAMAGE.\r
+//\r
+// File Description\r
+/// \file PbiFilter.cpp\r
+/// \brief Implements the PbiFilter class.\r
+//\r
+// Author: Derek Barnett\r
+\r
+#include "pbbam/PbiFilter.h"\r
+#include "pbbam/PbiFilterTypes.h"\r
+#include "StringUtils.h"\r
+#include <boost/algorithm/string/case_conv.hpp>\r
+#include <boost/algorithm/string/trim.hpp>\r
+#include <boost/numeric/conversion/cast.hpp>\r
+#include <algorithm>\r
+#include <fstream>\r
+#include <iostream>\r
+#include <sstream>\r
+#include <string>\r
+#include <unordered_map>\r
+#include <cctype>\r
+using namespace PacBio;\r
+using namespace PacBio::BAM;\r
+using namespace PacBio::BAM::internal;\r
+using namespace std;\r
+\r
+namespace PacBio {\r
+namespace BAM {\r
+namespace internal {\r
+\r
+enum class BuiltIn\r
+{\r
+ AlignedEndFilter\r
+ , AlignedLengthFilter\r
+ , AlignedStartFilter\r
+ , AlignedStrandFilter\r
+ , BarcodeFilter\r
+ , BarcodeForwardFilter\r
+ , BarcodeQualityFilter\r
+ , BarcodeReverseFilter\r
+ , BarcodesFilter\r
+ , IdentityFilter\r
+ , LocalContextFilter\r
+ , MovieNameFilter\r
+ , NumDeletedBasesFilter\r
+ , NumInsertedBasesFilter\r
+ , NumMatchesFilter\r
+ , NumMismatchesFilter\r
+ , QueryEndFilter\r
+ , QueryLengthFilter\r
+ , QueryNameFilter\r
+ , QueryNamesFromFileFilter\r
+ , QueryStartFilter\r
+ , ReadAccuracyFilter\r
+ , ReadGroupFilter\r
+ , ReferenceEndFilter\r
+ , ReferenceIdFilter\r
+ , ReferenceNameFilter\r
+ , ReferenceStartFilter\r
+ , ZmwFilter\r
+};\r
+\r
+static const unordered_map<string, BuiltIn> builtInLookup =\r
+{\r
+ // property name built-in filter\r
+ { "ae", BuiltIn::AlignedEndFilter },\r
+ { "aend", BuiltIn::AlignedEndFilter },\r
+ { "alignedlength", BuiltIn::AlignedLengthFilter },\r
+ { "as", BuiltIn::AlignedStartFilter },\r
+ { "astart", BuiltIn::AlignedStartFilter },\r
+ { "readstart", BuiltIn::AlignedStartFilter },\r
+ { "bc", BuiltIn::BarcodeFilter },\r
+ { "barcode", BuiltIn::BarcodeFilter },\r
+ { "bcf", BuiltIn::BarcodeForwardFilter },\r
+ { "bq", BuiltIn::BarcodeQualityFilter },\r
+ { "bcq", BuiltIn::BarcodeQualityFilter },\r
+ { "bcr", BuiltIn::BarcodeReverseFilter },\r
+ { "accuracy", BuiltIn::IdentityFilter },\r
+ { "identity", BuiltIn::IdentityFilter },\r
+ { "cx", BuiltIn::LocalContextFilter },\r
+ { "movie", BuiltIn::MovieNameFilter },\r
+ { "qe", BuiltIn::QueryEndFilter },\r
+ { "qend", BuiltIn::QueryEndFilter },\r
+ { "length", BuiltIn::QueryLengthFilter },\r
+ { "querylength", BuiltIn::QueryLengthFilter },\r
+ { "qname", BuiltIn::QueryNameFilter },\r
+ { "qname_file", BuiltIn::QueryNamesFromFileFilter },\r
+ { "qs", BuiltIn::QueryStartFilter },\r
+ { "qstart", BuiltIn::QueryStartFilter },\r
+ { "rq", BuiltIn::ReadAccuracyFilter },\r
+ { "te", BuiltIn::ReferenceEndFilter },\r
+ { "tend", BuiltIn::ReferenceEndFilter },\r
+ { "rname", BuiltIn::ReferenceNameFilter },\r
+ { "ts", BuiltIn::ReferenceStartFilter },\r
+ { "tstart", BuiltIn::ReferenceStartFilter },\r
+ { "pos", BuiltIn::ReferenceStartFilter },\r
+ { "zm", BuiltIn::ZmwFilter },\r
+ { "zmw", BuiltIn::ZmwFilter }\r
+};\r
+\r
+static const unordered_map<string, LocalContextFlags> contextFlagNames =\r
+{\r
+ { "NO_LOCAL_CONTEXT", LocalContextFlags::NO_LOCAL_CONTEXT },\r
+ { "ADAPTER_BEFORE", LocalContextFlags::ADAPTER_BEFORE },\r
+ { "ADAPTER_AFTER", LocalContextFlags::ADAPTER_AFTER },\r
+ { "BARCODE_BEFORE", LocalContextFlags::BARCODE_BEFORE },\r
+ { "BARCODE_AFTER", LocalContextFlags::BARCODE_AFTER },\r
+ { "FORWARD_PASS", LocalContextFlags::FORWARD_PASS },\r
+ { "REVERSE_PASS", LocalContextFlags::REVERSE_PASS }\r
+};\r
+\r
+// helper methods (for handling maybe-list strings))\r
+static inline bool isBracketed(const string& value)\r
+{\r
+ static const string openBrackets = "[({";\r
+ static const string closeBrackets = "])}";\r
+ return openBrackets.find(value.at(0)) != string::npos &&\r
+ closeBrackets.find(value.at(value.length()-1)) != string::npos;\r
+};\r
+\r
+static inline bool isList(const string& value)\r
+{\r
+ return value.find(',') != string::npos;\r
+}\r
+\r
+static\r
+PbiFilter CreateBarcodeFilter(string value,\r
+ const Compare::Type compareType)\r
+{\r
+ if (value.empty())\r
+ throw std::runtime_error("empty value for barcode filter property");\r
+\r
+ if (isBracketed(value)) {\r
+ value.erase(0,1);\r
+ value.pop_back();\r
+ }\r
+\r
+ if (isList(value)) {\r
+ vector<string> barcodes = internal::Split(value, ',');\r
+ if (barcodes.size() != 2)\r
+ throw std::runtime_error("only 2 barcode values expected");\r
+ return PbiBarcodesFilter{ boost::numeric_cast<int16_t>(stoi(barcodes.at(0))),\r
+ boost::numeric_cast<int16_t>(stoi(barcodes.at(1))),\r
+ compareType\r
+ };\r
+ } else\r
+ return PbiBarcodeFilter{ boost::numeric_cast<int16_t>(stoi(value)), compareType };\r
+}\r
+\r
+static\r
+PbiFilter CreateBarcodeForwardFilter(string value,\r
+ const Compare::Type compareType)\r
+{\r
+ if (value.empty())\r
+ throw std::runtime_error("empty value for barcode_forward filter property");\r
+\r
+ if (isBracketed(value)) {\r
+ value.erase(0,1);\r
+ value.pop_back();\r
+ }\r
+\r
+ if (isList(value)) {\r
+ vector<string> tokens = internal::Split(value, ',');\r
+ vector<int16_t> barcodes;\r
+ barcodes.reserve(tokens.size());\r
+ for (const auto& t : tokens) \r
+ barcodes.push_back(boost::numeric_cast<int16_t>(stoi(t)));\r
+ return PbiBarcodeForwardFilter{ std::move(barcodes) };\r
+ } else\r
+ return PbiBarcodeForwardFilter{ boost::numeric_cast<int16_t>(stoi(value)), compareType };\r
+}\r
+\r
+static\r
+PbiFilter CreateBarcodeReverseFilter(string value,\r
+ const Compare::Type compareType)\r
+{\r
+ if (value.empty())\r
+ throw std::runtime_error("empty value for barcode_reverse filter property");\r
+\r
+ if (isBracketed(value)) {\r
+ value.erase(0,1);\r
+ value.pop_back();\r
+ }\r
+\r
+ if (isList(value)) {\r
+ vector<string> tokens = internal::Split(value, ',');\r
+ vector<int16_t> barcodes;\r
+ barcodes.reserve(tokens.size());\r
+ for (const auto& t : tokens)\r
+ barcodes.push_back(boost::numeric_cast<int16_t>(stoi(t)));\r
+ return PbiBarcodeReverseFilter{ std::move(barcodes) };\r
+ } else\r
+ return PbiBarcodeReverseFilter{ boost::numeric_cast<int16_t>(stoi(value)), compareType };\r
+}\r
+\r
+static\r
+PbiFilter CreateLocalContextFilter(const string& value,\r
+ const Compare::Type compareType)\r
+{\r
+ if (value.empty())\r
+ throw std::runtime_error("empty value for local context filter property");\r
+\r
+ LocalContextFlags filterValue = LocalContextFlags::NO_LOCAL_CONTEXT;\r
+\r
+ // if raw integer\r
+ if (isdigit(value.at(0)))\r
+ filterValue = static_cast<LocalContextFlags>(stoi(value));\r
+\r
+ // else interpret as flag names\r
+ else {\r
+ vector<string> tokens = internal::Split(value, '|');\r
+ for (string& token : tokens) {\r
+ boost::algorithm::trim(token); // trim whitespace\r
+ filterValue = (filterValue | contextFlagNames.at(token));\r
+ }\r
+ }\r
+\r
+ return PbiFilter{ PbiLocalContextFilter{filterValue, compareType} };\r
+}\r
+\r
+static\r
+PbiFilter CreateQueryNamesFilterFromFile(const string& value,\r
+ const DataSet& dataset)\r
+{\r
+ // resolve file from dataset, value\r
+ const string resolvedFilename = dataset.ResolvePath(value);\r
+ vector<string> whitelist;\r
+ string fn;\r
+ ifstream in(resolvedFilename);\r
+ while (getline(in, fn))\r
+ whitelist.push_back(fn);\r
+ return PbiQueryNameFilter{ whitelist };\r
+}\r
+\r
+static\r
+PbiFilter FromDataSetProperty(const Property& property,\r
+ const DataSet& dataset)\r
+{\r
+ try {\r
+ const string& value = property.Value();\r
+ const Compare::Type compareType = Compare::TypeFromOperator(property.Operator());\r
+ const BuiltIn builtInCode = builtInLookup.at(boost::algorithm::to_lower_copy(property.Name()));\r
+ switch (builtInCode) {\r
+\r
+ // single-value filters\r
+ case BuiltIn::AlignedEndFilter : return PbiAlignedEndFilter{ static_cast<uint32_t>(stoul(value)), compareType };\r
+ case BuiltIn::AlignedLengthFilter : return PbiAlignedLengthFilter{ static_cast<uint32_t>(stoul(value)), compareType };\r
+ case BuiltIn::AlignedStartFilter : return PbiAlignedStartFilter{ static_cast<uint32_t>(stoul(value)), compareType };\r
+ case BuiltIn::BarcodeQualityFilter : return PbiBarcodeQualityFilter{ static_cast<uint8_t>(stoul(value)), compareType };\r
+ case BuiltIn::IdentityFilter : return PbiIdentityFilter{ stof(value), compareType };\r
+ case BuiltIn::MovieNameFilter : return PbiMovieNameFilter{ value };\r
+ case BuiltIn::QueryEndFilter : return PbiQueryEndFilter{ stoi(value), compareType };\r
+ case BuiltIn::QueryLengthFilter : return PbiQueryLengthFilter{ stoi(value), compareType };\r
+ case BuiltIn::QueryNameFilter : return PbiQueryNameFilter{ value };\r
+ case BuiltIn::QueryStartFilter : return PbiQueryStartFilter{ stoi(value), compareType };\r
+ case BuiltIn::ReadAccuracyFilter : return PbiReadAccuracyFilter{ stof(value), compareType };\r
+ case BuiltIn::ReadGroupFilter : return PbiReadGroupFilter{ value, compareType };\r
+ case BuiltIn::ReferenceEndFilter : return PbiReferenceEndFilter{ static_cast<uint32_t>(stoul(value)), compareType };\r
+ case BuiltIn::ReferenceIdFilter : return PbiReferenceIdFilter{ stoi(value), compareType };\r
+ case BuiltIn::ReferenceNameFilter : return PbiReferenceNameFilter{ value };\r
+ case BuiltIn::ReferenceStartFilter : return PbiReferenceStartFilter{ static_cast<uint32_t>(stoul(value)), compareType };\r
+ case BuiltIn::ZmwFilter : return PbiZmwFilter{ stoi(value), compareType };\r
+\r
+ // (maybe) list-value filters\r
+ case BuiltIn::BarcodeFilter : return CreateBarcodeFilter(value, compareType);\r
+ case BuiltIn::BarcodeForwardFilter : return CreateBarcodeForwardFilter(value, compareType);\r
+ case BuiltIn::BarcodeReverseFilter : return CreateBarcodeReverseFilter(value, compareType); \r
+ case BuiltIn::LocalContextFilter : return CreateLocalContextFilter(value, compareType);\r
+\r
+ // other built-ins\r
+ case BuiltIn::QueryNamesFromFileFilter : return CreateQueryNamesFilterFromFile(value, dataset); // compareType ignored\r
+\r
+ default :\r
+ throw std::exception();\r
+ }\r
+ // unreachable\r
+ return PbiFilter{ };\r
+\r
+ } catch (std::exception& e) {\r
+ stringstream s;\r
+ s << "error: could not create filter from XML Property element: " << endl\r
+ << " Name: " << property.Name() << endl\r
+ << " Value: " << property.Value() << endl\r
+ << " Operator: " << property.Operator() << endl\r
+ << " reason: " << e.what() << endl;\r
+ throw std::runtime_error(s.str());\r
+ }\r
+}\r
+\r
+} // namespace internal\r
+} // namespace BAM\r
+} // namespace PacBio\r
+\r
+PbiFilter PbiFilter::FromDataSet(const DataSet& dataset)\r
+{\r
+ auto datasetFilter = PbiFilter{ PbiFilter::UNION };\r
+ for (auto&& xmlFilter : dataset.Filters()) {\r
+ auto propertiesFilter = PbiFilter{ };\r
+ for (auto&& xmlProperty : xmlFilter.Properties())\r
+ propertiesFilter.Add(internal::FromDataSetProperty(xmlProperty, dataset));\r
+ datasetFilter.Add(propertiesFilter);\r
+ }\r
+ return datasetFilter;\r
+}\r
+\r
+PbiFilter PbiFilter::Intersection(const std::vector<PbiFilter>& filters)\r
+{\r
+ auto result = PbiFilter{ PbiFilter::INTERSECT };\r
+ result.Add(filters);\r
+ return result;\r
+}\r
+\r
+PbiFilter PbiFilter::Intersection(std::vector<PbiFilter>&& filters)\r
+{\r
+ auto result = PbiFilter{ PbiFilter::INTERSECT };\r
+ result.Add(std::move(filters));\r
+ return result;\r
+}\r
+\r
+PbiFilter PbiFilter::Union(const std::vector<PbiFilter>& filters)\r
+{\r
+ auto result = PbiFilter{ PbiFilter::UNION };\r
+ result.Add(filters);\r
+ return result;\r
+}\r
+\r
+PbiFilter PbiFilter::Union(std::vector<PbiFilter>&& filters)\r
+{\r
+ auto result = PbiFilter{ PbiFilter::UNION };\r
+ result.Add(std::move(filters));\r
+ return result;\r
+}\r
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilterQuery.cpp
+/// \brief Implements the PbiFilterQuery class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiFilterQuery.h"
+#include "pbbam/CompositeBamReader.h"
+
+
+#include <iostream>
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+struct PbiFilterQuery::PbiFilterQueryPrivate
+{
+ PbiFilterQueryPrivate(const PbiFilter& filter, const DataSet& dataset)
+ : reader_(filter, dataset)
+ { }
+
+ PbiFilterCompositeBamReader<Compare::None> reader_; // unsorted
+};
+
+PbiFilterQuery::PbiFilterQuery(const PbiFilter& filter, const DataSet& dataset)
+ : internal::IQuery()
+ , d_(new PbiFilterQueryPrivate(filter, dataset))
+{ }
+
+PbiFilterQuery::~PbiFilterQuery(void) { }
+
+bool PbiFilterQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiFilterTypes.cpp
+/// \brief Implements the built-in PBI filters.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiFilterTypes.h"
+#include "StringUtils.h"
+#include <boost/algorithm/string.hpp>
+#include <sstream>
+#include <string>
+#include <cassert>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+template<typename T>
+IndexList readLengthHelper(const std::vector<T>& start,
+ const std::vector<T>& end,
+ const T& value,
+ const Compare::Type cmp)
+{
+ assert(start.size() == end.size());
+
+ auto result = IndexList{ };
+ const auto numElements = start.size();
+ for (size_t i = 0; i < numElements; ++i) {
+ const auto readLength = end[i] - start[i];
+ bool keep = false;
+ switch(cmp) {
+ case Compare::EQUAL : keep = (readLength == value); break;
+ case Compare::NOT_EQUAL : keep = (readLength != value); break;
+ case Compare::LESS_THAN : keep = (readLength < value); break;
+ case Compare::LESS_THAN_EQUAL : keep = (readLength <= value); break;
+ case Compare::GREATER_THAN : keep = (readLength > value); break;
+ case Compare::GREATER_THAN_EQUAL : keep = (readLength >= value); break;
+ default:
+ assert(false);
+ throw std::runtime_error(string{"read length filter encountered unknown Compare::Type: "} +
+ Compare::TypeToName(cmp));
+ }
+
+ if (keep)
+ result.push_back(i);
+ }
+ return result;
+}
+
+static
+PbiFilter filterFromMovieName(const string& movieName, bool includeCcs)
+{
+ // we'll match on any rgIds from our candidate list
+ auto filter = PbiFilter{ PbiFilter::UNION };
+ filter.Add(
+ {
+ PbiReadGroupFilter{ MakeReadGroupId(movieName, "POLYMERASE") },
+ PbiReadGroupFilter{ MakeReadGroupId(movieName, "HQREGION") },
+ PbiReadGroupFilter{ MakeReadGroupId(movieName, "SUBREAD") },
+ PbiReadGroupFilter{ MakeReadGroupId(movieName, "SCRAP") },
+ PbiReadGroupFilter{ MakeReadGroupId(movieName, "UNKNOWN") }
+ });
+ if (includeCcs)
+ filter.Add(PbiReadGroupFilter{ MakeReadGroupId(movieName, "CCS") });
+
+ return filter;
+}
+
+//static
+//PbiFilter filterFromQueryName(const string& queryName)
+//{
+// // split full name into moviename, holenumber
+// const auto nameParts = internal::Split(queryName, '/');
+// if (nameParts.size() != 3) {
+// auto msg = string{ "PbiQueryNameFilter error: requested QNAME (" } + queryName;
+// msg += string{ ") is not a valid PacBio BAM QNAME. See spec for details"};
+// throw std::runtime_error(msg);
+// }
+//
+// // main filter: {union of candidate rgIds} && zmw [&& qStart && qEnd](non-CCS reads)
+// auto filter = PbiFilter{ };
+// filter.Add(PbiZmwFilter{ stoi(nameParts.at(1)) }); // hole number
+//
+// const auto movieName = nameParts.at(0);
+//
+// // CCS (only 1 possible candidate rgId)
+// if (nameParts.at(2) == "ccs")
+// filter.Add(PbiReadGroupFilter{ MakeReadGroupId(movieName, "CCS") });
+//
+// // all other read types
+// else {
+// // we'll match on any read type that matches our qname
+// // (except for CCS since it has a different QNAME anyway)
+// const auto rgIdFilter = filterFromMovieName(movieName, false);
+// filter.Add(rgIdFilter);
+//
+// // add qStart/qEnd filters to our main filter
+// const auto queryIntervalParts = internal::Split(nameParts.at(2), '_');
+// if (queryIntervalParts.size() != 2) {
+// auto msg = string{ "PbiQueryNameFilter error: requested QNAME (" } + queryName;
+// msg += string{ ") is not a valid PacBio BAM QNAME. See spec for details"};
+// throw std::runtime_error(msg);
+// }
+// filter.Add(PbiQueryStartFilter{ stoi(queryIntervalParts.at(0)) });
+// filter.Add(PbiQueryEndFilter{ stoi(queryIntervalParts.at(1)) });
+// }
+// return filter;
+//}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+// PbiAlignedLengthFilter
+
+bool PbiAlignedLengthFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{
+ const auto& mappedData = idx.MappedData();
+ const auto& aEnd = mappedData.aEnd_.at(row) ;
+ const auto& aStart = mappedData.aStart_.at(row);
+ const auto aLength = aEnd - aStart;
+ return CompareHelper(aLength);
+}
+
+// PbiIdentityFilter
+
+bool PbiIdentityFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{
+ const auto& mappedData = idx.MappedData();
+ const auto& nMM = mappedData.nMM_.at(row);
+ const auto& nIndels = mappedData.NumDeletedAndInsertedBasesAt(row);
+ const auto& nDel = nIndels.first;
+ const auto& nIns = nIndels.second;
+
+ const auto& basicData = idx.BasicData();
+ const auto& qStart = basicData.qStart_.at(row);
+ const auto& qEnd = basicData.qEnd_.at(row);
+
+ const auto readLength = qEnd - qStart;
+ const auto nonMatches = nMM + nDel + nIns;
+ const float identity = 1.0 - (static_cast<float>(nonMatches)/static_cast<float>(readLength));
+
+ return CompareHelper(identity);
+}
+
+// PbiMovieNameFilter
+
+PbiMovieNameFilter::PbiMovieNameFilter(const std::string& movieName)
+ : compositeFilter_(internal::filterFromMovieName(movieName, true)) // include CCS
+{ }
+
+PbiMovieNameFilter::PbiMovieNameFilter(const std::vector<std::string>& whitelist)
+ : compositeFilter_(PbiFilter::UNION)
+{
+ for (const auto& movieName : whitelist)
+ compositeFilter_.Add(internal::filterFromMovieName(movieName, true)); // include CCS
+}
+
+PbiMovieNameFilter::PbiMovieNameFilter(std::vector<std::string>&& whitelist)
+ : compositeFilter_(PbiFilter::UNION)
+{
+ for (auto&& movieName : whitelist)
+ compositeFilter_.Add(internal::filterFromMovieName(movieName, true)); // include CCS
+}
+
+// PbiQueryLengthFilter
+
+bool PbiQueryLengthFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{
+ const auto& basicData = idx.BasicData();
+ const auto& qStart = basicData.qStart_.at(row);
+ const auto& qEnd = basicData.qEnd_.at(row);
+ const auto readLength = qEnd - qStart;
+ return CompareHelper(readLength);
+}
+
+// PbiQueryNameFilter
+
+struct PbiQueryNameFilter::PbiQueryNameFilterPrivate
+{
+public:
+ typedef pair<int32_t, int32_t> QueryInterval;
+ typedef set<QueryInterval> QueryIntervals;
+ typedef unordered_map<int32_t, QueryIntervals> ZmwLookup;
+ typedef shared_ptr<ZmwLookup> ZmwLookupPtr;
+ typedef unordered_map<int32_t, ZmwLookupPtr> RgIdLookup;
+
+public:
+ PbiQueryNameFilterPrivate(const vector<string>& whitelist)
+ {
+ for (const auto& queryName : whitelist) {
+
+ // split name into main parts
+ auto nameParts = internal::Split(queryName, '/');
+ if (nameParts.size() != 3) {
+ auto msg = string{ "PbiQueryNameFilter error: requested QNAME (" } + queryName;
+ msg += string{ ") is not a valid PacBio BAM QNAME. See spec for details"};
+ throw std::runtime_error(msg);
+ }
+
+ //
+ // generate candidate read group IDs from movie name
+ //
+ // then, ensure read group IDs in lookup table, creating or fetching
+ // shared ZmwLookup table if new movie
+ //
+ const string& movieName = nameParts.at(0);
+ const bool isCCS = (nameParts.at(2) == "ccs" || nameParts.at(2) == "CCS");
+ vector<int32_t> rgIds;
+ if (isCCS) {
+ rgIds.push_back( ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "CCS")) );
+ } else {
+ rgIds.reserve(6);
+ rgIds.push_back( ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "POLYMERASE")));
+ rgIds.push_back( ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "HQREGION")));
+ rgIds.push_back( ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "SUBREAD")));
+ rgIds.push_back( ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "SCRAP")));
+ rgIds.push_back( ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "UNKNOWN")));
+ rgIds.push_back( ReadGroupInfo::IdToInt(MakeReadGroupId(movieName, "ZMW")));
+ }
+ assert(!rgIds.empty());
+ auto rgFound = lookup_.find(rgIds.front());
+ ZmwLookupPtr zmwPtr = nullptr;
+ if (rgFound == lookup_.end()) {
+ zmwPtr = ZmwLookupPtr(new ZmwLookup);
+ for (const auto& rg : rgIds) {
+ assert(lookup_.find(rg) == lookup_.end());
+ lookup_.emplace(rg, zmwPtr);
+ }
+ }
+ else {
+#ifndef NDEBUG
+ for (const auto& rg : rgIds)
+ assert(lookup_.find(rg) != lookup_.end());
+#endif
+ zmwPtr = rgFound->second;
+ }
+
+ // fetch ZMW & QueryStart/QEnd from query name
+ const int32_t zmw = stoi(nameParts.at(1));
+ int32_t queryStart = -1;
+ int32_t queryEnd = -1;
+ if (!isCCS) {
+ const auto queryIntervalParts = internal::Split(nameParts.at(2), '_');
+ if (queryIntervalParts.size() != 2) {
+ auto msg = string{ "PbiQueryNameFilter error: requested QNAME (" } + queryName;
+ msg += string{ ") is not a valid PacBio BAM QNAME. See spec for details"};
+ throw std::runtime_error(msg);
+ }
+ queryStart = stoi(queryIntervalParts.at(0));
+ queryEnd = stoi(queryIntervalParts.at(1));
+ }
+
+ // creating new ZMW entry if not yet seen & store QS/QE pair
+ //
+ const auto zmwFound = zmwPtr->find(zmw);
+ if (zmwFound == zmwPtr->end())
+ zmwPtr->emplace(zmw, QueryIntervals{});
+ QueryIntervals& queryIntervals = zmwPtr->at(zmw);
+ queryIntervals.emplace(make_pair(queryStart, queryEnd));
+ }
+ }
+
+ PbiQueryNameFilterPrivate(const unique_ptr<PbiQueryNameFilterPrivate>& other)
+ {
+ if (other)
+ lookup_ = other->lookup_;
+ }
+
+ bool Accepts(const PbiRawData& idx, const size_t row) const
+ {
+ const auto& basicData = idx.BasicData();
+
+ // see if row's RGID known
+ const auto& rgId = basicData.rgId_.at(row);
+ const auto rgFound = lookup_.find(rgId);
+ if (rgFound == lookup_.end())
+ return false;
+
+ // see if row's ZMW known
+ const auto& zmwPtr = rgFound->second;
+ const auto zmw = basicData.holeNumber_.at(row);
+ const auto zmwFound = zmwPtr->find(zmw);
+ if (zmwFound == zmwPtr->end())
+ return false;
+
+ // see if row's QueryStart/QueryEnd known
+ // CCS names already covered in lookup construction phase
+ const auto& queryIntervals = zmwFound->second;
+ const auto qStart = basicData.qStart_.at(row);
+ const auto qEnd = basicData.qEnd_.at(row);
+ const auto queryInterval = make_pair(qStart, qEnd);
+ return queryIntervals.find(queryInterval) != queryIntervals.end();
+ }
+
+private:
+ RgIdLookup lookup_;
+};
+
+PbiQueryNameFilter::PbiQueryNameFilter(const std::string& qname)
+ : d_(new PbiQueryNameFilter::PbiQueryNameFilterPrivate(vector<string>{1, qname}))
+{ }
+// : compositeFilter_(internal::filterFromQueryName(qname))
+//{ }
+
+PbiQueryNameFilter::PbiQueryNameFilter(const std::vector<std::string>& whitelist)
+ : d_(new PbiQueryNameFilter::PbiQueryNameFilterPrivate(whitelist))
+{ }
+// : compositeFilter_(PbiFilter::UNION)
+//{
+// try {
+// for (const auto& qname : whitelist)
+// compositeFilter_.Add(internal::filterFromQueryName(qname));
+// }
+// // simply re-throw our own exception
+// catch (std::runtime_error&) {
+// throw;
+// }
+// // we may hit other exceptions (e.g. in stoi()) - but we'll pin on a bit of extra data
+// catch (std::exception& e) {
+// auto msg = string{ "PbiQueryNameFilter encountered error: " } + e.what();
+// throw std::runtime_error(msg);
+// }
+//}
+
+//PbiQueryNameFilter::PbiQueryNameFilter(std::vector<std::string>&& whitelist)
+// : d_(new PbiQueryNameFilter::PbiQueryNameFilterPrivate(whitelist))
+//{ }
+// : compositeFilter_(PbiFilter::UNION)
+//{
+// try {
+// for (const auto& qname : whitelist)
+// compositeFilter_.Add(internal::filterFromQueryName(qname));
+// }
+// // simply re-throw our own exception
+// catch (std::runtime_error&) {
+// throw;
+// }
+// // we may hit other exceptions (e.g. in stoi()) - but we'll pin on a bit of extra data
+// catch (std::exception& e) {
+// auto msg = string{ "PbiQueryNameFilter encountered error: " } + e.what();
+// throw std::runtime_error(msg);
+// }
+//}
+
+PbiQueryNameFilter::PbiQueryNameFilter(const PbiQueryNameFilter& other)
+ : d_(new PbiQueryNameFilter::PbiQueryNameFilterPrivate(other.d_))
+{ }
+
+PbiQueryNameFilter::~PbiQueryNameFilter(void) { }
+
+bool PbiQueryNameFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{ return d_->Accepts(idx, row); }
+//{ return compositeFilter_.Accepts(idx, row); }
+
+// PbiReferenceNameFilter
+
+PbiReferenceNameFilter::PbiReferenceNameFilter(const std::string& rname,
+ const Compare::Type cmp)
+ : initialized_(false)
+ , rname_(rname)
+ , cmp_(cmp)
+{
+ if (cmp != Compare::EQUAL && cmp != Compare::NOT_EQUAL) {
+ auto msg = std::string{ "Compare type: " };
+ msg += Compare::TypeToName(cmp);
+ msg += " not supported for PbiReferenceNameFilter (use one of Compare::EQUAL or Compare::NOT_EQUAL).";
+ throw std::runtime_error(msg);
+ }
+}
+
+PbiReferenceNameFilter::PbiReferenceNameFilter(const std::vector<std::string>& whitelist)
+ : initialized_(false)
+ , rnameWhitelist_(whitelist)
+ , cmp_(Compare::EQUAL)
+{ }
+
+PbiReferenceNameFilter::PbiReferenceNameFilter(std::vector<std::string>&& whitelist)
+ : initialized_(false)
+ , rnameWhitelist_(std::move(whitelist))
+ , cmp_(Compare::EQUAL)
+{ }
+
+bool PbiReferenceNameFilter::Accepts(const PbiRawData& idx, const size_t row) const
+{
+ if (!initialized_)
+ Initialize(idx);
+ return subFilter_.Accepts(idx, row);
+}
+
+void PbiReferenceNameFilter::Initialize(const PbiRawData& idx) const
+{
+ const auto pbiFilename = idx.Filename();
+ const auto bamFilename = pbiFilename.substr(0, pbiFilename.length() - 4);
+ const auto bamFile = BamFile{ bamFilename };
+
+ // single-value
+ if (rnameWhitelist_ == boost::none) {
+ const auto tId = bamFile.ReferenceId(rname_);
+ subFilter_ = PbiReferenceIdFilter{ tId, cmp_ };
+ }
+
+ // multi-value whitelist
+ else {
+ subFilter_ = PbiFilter(PbiFilter::UNION);
+ for (const auto& rname : rnameWhitelist_.get())
+ subFilter_.Add(PbiReferenceIdFilter{ bamFile.ReferenceId(rname) });
+ }
+ initialized_ = true;
+}
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiIndex.cpp
+/// \brief Implements the PbiIndex class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiIndex.h"
+#include "PbiIndexIO.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+// ----------------------------------
+// SubreadLookupData implementation
+// ----------------------------------
+
+BasicLookupData::BasicLookupData(void) { }
+
+BasicLookupData::BasicLookupData(const PbiRawBasicData& rawData)
+ : rgId_(rawData.rgId_)
+ , qStart_(rawData.qStart_)
+ , qEnd_(rawData.qEnd_)
+ , holeNumber_(rawData.holeNumber_)
+ , readQual_(rawData.readQual_)
+ , ctxtFlag_(rawData.ctxtFlag_)
+ , fileOffset_(rawData.fileOffset_)
+{ }
+
+// ----------------------------------
+// MappedLookupData implementation
+// ----------------------------------
+
+MappedLookupData::MappedLookupData(void) { }
+
+MappedLookupData::MappedLookupData(const PbiRawMappedData& rawData)
+ : tId_(rawData.tId_)
+ , tStart_(rawData.tStart_)
+ , tEnd_(rawData.tEnd_)
+ , aStart_(rawData.aStart_)
+ , aEnd_(rawData.aEnd_)
+ , nM_(rawData.nM_)
+ , nMM_(rawData.nMM_)
+ , mapQV_(rawData.mapQV_)
+{
+ const size_t numElements = rawData.revStrand_.size();
+ reverseStrand_.reserve(numElements/2);
+ forwardStrand_.reserve(numElements/2);
+
+ std::map<uint32_t, IndexList> insRawData;
+ std::map<uint32_t, IndexList> delRawData;
+ for (size_t i = 0; i < numElements; ++i) {
+
+ // nDel, nIns
+ const auto indels = rawData.NumDeletedAndInsertedBasesAt(i);
+ delRawData[indels.first].push_back(i);
+ insRawData[indels.second].push_back(i);
+
+ // strand
+ if (rawData.revStrand_.at(i) == 0)
+ forwardStrand_.push_back(i);
+ else
+ reverseStrand_.push_back(i);
+ }
+
+ nIns_ = OrderedLookup<uint32_t>(std::move(insRawData));
+ nDel_ = OrderedLookup<uint32_t>(std::move(delRawData));
+}
+
+// ----------------------------------
+// BarcodeLookupData implementation
+// ----------------------------------
+
+BarcodeLookupData::BarcodeLookupData(void) { }
+
+BarcodeLookupData::BarcodeLookupData(const PbiRawBarcodeData& rawData)
+ : bcForward_(rawData.bcForward_)
+ , bcReverse_(rawData.bcReverse_)
+ , bcQual_(rawData.bcQual_)
+
+{ }
+
+// ----------------------------------
+// ReferenceLookupData implementation
+// ----------------------------------
+
+ReferenceLookupData::ReferenceLookupData(void) { }
+
+ReferenceLookupData::ReferenceLookupData(const PbiRawReferenceData& rawData)
+{
+ const size_t numEntries = rawData.entries_.size();
+ references_.reserve(numEntries);
+ for (size_t i = 0; i < numEntries; ++i) {
+ const PbiReferenceEntry& entry = rawData.entries_.at(i);
+ references_[entry.tId_] = IndexRange(entry.beginRow_, entry.endRow_);
+ }
+}
+
+// --------------------------------
+// PbiIndexPrivate implementation
+// --------------------------------
+
+PbiIndexPrivate::PbiIndexPrivate(void)
+ : version_(PbiFile::CurrentVersion)
+ , sections_(PbiFile::BASIC)
+ , numReads_(0)
+{ }
+
+PbiIndexPrivate::PbiIndexPrivate(const PbiRawData& rawIndex)
+ : filename_(rawIndex.Filename())
+ , version_(rawIndex.Version())
+ , sections_(rawIndex.FileSections())
+ , numReads_(rawIndex.NumReads())
+ , basicData_(rawIndex.BasicData())
+ , mappedData_(rawIndex.MappedData())
+ , referenceData_(rawIndex.ReferenceData())
+ , barcodeData_(rawIndex.BarcodeData())
+{ }
+
+PbiIndexPrivate::PbiIndexPrivate(PbiRawData&& rawIndex)
+ : filename_(rawIndex.Filename())
+ , version_(std::move(rawIndex.Version()))
+ , sections_(std::move(rawIndex.FileSections()))
+ , numReads_(std::move(rawIndex.NumReads()))
+ , basicData_(std::move(rawIndex.BasicData()))
+ , mappedData_(std::move(rawIndex.MappedData()))
+ , referenceData_(std::move(rawIndex.ReferenceData()))
+ , barcodeData_(std::move(rawIndex.BarcodeData()))
+{ }
+
+unique_ptr<PbiIndexPrivate> PbiIndexPrivate::DeepCopy(void) const
+{
+ std::unique_ptr<PbiIndexPrivate> copy(new PbiIndexPrivate);
+ copy->filename_ = filename_;
+ copy->version_ = version_;
+ copy->sections_ = sections_;
+ copy->numReads_ = numReads_;
+ copy->basicData_ = basicData_;
+ copy->mappedData_ = mappedData_;
+ copy->referenceData_ = referenceData_;
+ copy->barcodeData_ = barcodeData_;
+ return copy;
+}
+
+// -------------------------
+// PbiIndex implementation
+// -------------------------
+
+PbiIndex::PbiIndex(void)
+ : d_(new PbiIndexPrivate)
+{ }
+
+PbiIndex::PbiIndex(const string& pbiFilename)
+ : d_(new PbiIndexPrivate(PbiRawData(pbiFilename)))
+{ }
+
+PbiIndex::PbiIndex(const PbiIndex& other)
+ : d_(std::forward<unique_ptr<PbiIndexPrivate>>(other.d_->DeepCopy()))
+{
+ // move is ok, since it's a deep-copied, new object
+}
+
+PbiIndex::PbiIndex(PbiIndex&& other)
+ : d_(std::move(other.d_))
+{ }
+
+PbiIndex& PbiIndex::operator=(const PbiIndex& other)
+{
+ // move is ok, since it's a deep-copied, new object
+ d_ = other.d_->DeepCopy();
+ return *this;
+}
+
+PbiIndex& PbiIndex::operator=(PbiIndex&& other)
+{
+ d_ = std::move(other.d_);
+ return *this;
+}
+
+PbiIndex::~PbiIndex(void) { }
+
+string PbiIndex::Filename(void) const
+{ return d_->filename_; }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// Author: Derek Barnett
+
+#include "PbiIndexIO.h"
+
+#include "pbbam/BamFile.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/EntireFileQuery.h"
+#include "pbbam/PbiBuilder.h"
+#include "MemoryUtils.h"
+#include <boost/algorithm/string.hpp>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+// \brief Appends content of src vector to dst vector using move semantics.
+///
+/// \param[in] src Input vector that will be empty after execution
+/// \param[in,out] dst Output vector that will be appended to
+///
+template <typename T>
+inline void MoveAppend(std::vector<T>& src, std::vector<T>& dst) noexcept
+{
+ if (dst.empty())
+ {
+ dst = std::move(src);
+ }
+ else
+ {
+ dst.reserve(dst.size() + src.size());
+ std::move(src.begin(), src.end(), std::back_inserter(dst));
+ src.clear();
+ }
+}
+
+/// \brief Appends content of src vector to dst vector using move semantics.
+///
+/// \param[in] src Input vector via perfect forwarding
+/// \param[in,out] dst Output vector that will be appended to
+///
+template <typename T>
+inline void MoveAppend(std::vector<T>&& src, std::vector<T>& dst) noexcept
+{
+ if (dst.empty())
+ {
+ dst = std::move(src);
+ }
+ else
+ {
+ dst.reserve(dst.size() + src.size());
+ std::move(src.begin(), src.end(), std::back_inserter(dst));
+ src.clear();
+ }
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+// ---------------------------
+// PbiIndexIO implementation
+// ---------------------------
+
+PbiRawData PbiIndexIO::Load(const std::string& pbiFilename)
+{
+ PbiRawData rawData;
+ Load(rawData, pbiFilename);
+ return rawData;
+}
+
+void PbiIndexIO::Load(PbiRawData& rawData,
+ const string& filename)
+{
+ // open file for reading
+ if (!boost::algorithm::iends_with(filename, ".pbi"))
+ throw std::runtime_error("unsupported file extension");
+ std::unique_ptr<BGZF, HtslibBgzfDeleter> bgzf(bgzf_open(filename.c_str(), "rb"));
+ BGZF* fp = bgzf.get();
+ if (fp == 0)
+ throw std::runtime_error("could not open PBI file for reading");
+
+ // load data
+ LoadHeader(rawData, fp);
+ const uint32_t numReads = rawData.NumReads();
+ if (numReads > 0) {
+ LoadBasicData(rawData.BasicData(), numReads, fp);
+ if (rawData.HasMappedData())
+ LoadMappedData(rawData.MappedData(), numReads, fp);
+ if (rawData.HasReferenceData())
+ LoadReferenceData(rawData.ReferenceData(), fp);
+ if (rawData.HasBarcodeData())
+ LoadBarcodeData(rawData.BarcodeData(), numReads, fp);
+ }
+}
+
+void PbiIndexIO::LoadFromDataSet(PbiRawData& aggregateData,
+ const DataSet& dataset)
+{
+ aggregateData.NumReads(0);
+ aggregateData.FileSections(PbiFile::BASIC | PbiFile::MAPPED | PbiFile::BARCODE);
+ aggregateData.Version(PbiFile::CurrentVersion);
+
+ const auto bamFiles = dataset.BamFiles();
+ uint16_t fileNumber = 0;
+ for (const auto& bamFile : bamFiles) {
+ PbiRawData currentPbi{bamFile.PacBioIndexFilename()};
+ const size_t currentPbiCount = currentPbi.NumReads();
+
+ // read count
+ aggregateData.NumReads(aggregateData.NumReads()+currentPbiCount);
+
+ // BasicData
+ PbiRawBasicData& aggregateBasicData = aggregateData.BasicData();
+ PbiRawBasicData& currentBasicData = currentPbi.BasicData();
+ MoveAppend(std::move(currentBasicData.rgId_), aggregateBasicData.rgId_);
+ MoveAppend(std::move(currentBasicData.qStart_), aggregateBasicData.qStart_);
+ MoveAppend(std::move(currentBasicData.qEnd_), aggregateBasicData.qEnd_);
+ MoveAppend(std::move(currentBasicData.holeNumber_), aggregateBasicData.holeNumber_);
+ MoveAppend(std::move(currentBasicData.readQual_), aggregateBasicData.readQual_);
+ MoveAppend(std::move(currentBasicData.ctxtFlag_), aggregateBasicData.ctxtFlag_);
+ MoveAppend(std::move(currentBasicData.fileOffset_), aggregateBasicData.fileOffset_);
+ MoveAppend(std::vector<uint16_t>(currentPbiCount, fileNumber), aggregateBasicData.fileNumber_);
+
+ // BarcodeData
+ PbiRawBarcodeData& aggregateBarcodeData = aggregateData.BarcodeData();
+ if (currentPbi.HasBarcodeData()) {
+ PbiRawBarcodeData& currentBarcodeData = currentPbi.BarcodeData();
+ MoveAppend(std::move(currentBarcodeData.bcForward_), aggregateBarcodeData.bcForward_);
+ MoveAppend(std::move(currentBarcodeData.bcReverse_), aggregateBarcodeData.bcReverse_);
+ MoveAppend(std::move(currentBarcodeData.bcQual_), aggregateBarcodeData.bcQual_);
+ } else {
+ MoveAppend(std::vector<int16_t>(currentPbiCount, -1), aggregateBarcodeData.bcForward_);
+ MoveAppend(std::vector<int16_t>(currentPbiCount, -1), aggregateBarcodeData.bcReverse_);
+ MoveAppend(std::vector<int8_t>(currentPbiCount, -1), aggregateBarcodeData.bcQual_);
+ }
+
+ // MappedData
+ PbiRawMappedData& aggregateMappedData = aggregateData.MappedData();
+ if (currentPbi.HasMappedData()) {
+ PbiRawMappedData& currentMappedData = currentPbi.MappedData();
+ MoveAppend(std::move(currentMappedData.tId_), aggregateMappedData.tId_);
+ MoveAppend(std::move(currentMappedData.tStart_), aggregateMappedData.tStart_);
+ MoveAppend(std::move(currentMappedData.tEnd_), aggregateMappedData.tEnd_);
+ MoveAppend(std::move(currentMappedData.aStart_), aggregateMappedData.aStart_);
+ MoveAppend(std::move(currentMappedData.aEnd_), aggregateMappedData.aEnd_);
+ MoveAppend(std::move(currentMappedData.revStrand_), aggregateMappedData.revStrand_);
+ MoveAppend(std::move(currentMappedData.nM_), aggregateMappedData.nM_);
+ MoveAppend(std::move(currentMappedData.nMM_), aggregateMappedData.nMM_);
+ MoveAppend(std::move(currentMappedData.mapQV_), aggregateMappedData.mapQV_);
+ } else {
+ MoveAppend(std::vector<int32_t>(currentPbiCount, -1), aggregateMappedData.tId_);
+ MoveAppend(std::vector<uint32_t>(currentPbiCount, UnmappedPosition), aggregateMappedData.tStart_);
+ MoveAppend(std::vector<uint32_t>(currentPbiCount, UnmappedPosition), aggregateMappedData.tEnd_);
+ MoveAppend(std::vector<uint32_t>(currentPbiCount, UnmappedPosition), aggregateMappedData.aStart_);
+ MoveAppend(std::vector<uint32_t>(currentPbiCount, UnmappedPosition), aggregateMappedData.aEnd_);
+ MoveAppend(std::vector<uint8_t>(currentPbiCount, 0), aggregateMappedData.revStrand_);
+ MoveAppend(std::vector<uint32_t>(currentPbiCount, 0), aggregateMappedData.nM_);
+ MoveAppend(std::vector<uint32_t>(currentPbiCount, 0), aggregateMappedData.nMM_);
+ MoveAppend(std::vector<uint8_t>(currentPbiCount, 255), aggregateMappedData.mapQV_);
+ }
+
+ ++fileNumber;
+ }
+}
+
+void PbiIndexIO::LoadBarcodeData(PbiRawBarcodeData& barcodeData,
+ const uint32_t numReads,
+ BGZF* fp)
+{
+ assert(numReads > 0);
+ (void)numReads; // quash warnings building in release mode
+
+ LoadBgzfVector(fp, barcodeData.bcForward_, numReads);
+ LoadBgzfVector(fp, barcodeData.bcReverse_, numReads);
+ LoadBgzfVector(fp, barcodeData.bcQual_, numReads);
+
+ assert(barcodeData.bcForward_.size() == numReads);
+ assert(barcodeData.bcReverse_.size() == numReads);
+ assert(barcodeData.bcQual_.size() == numReads);
+}
+
+void PbiIndexIO::LoadHeader(PbiRawData& index,
+ BGZF* fp)
+{
+ size_t bytesRead = 0;
+
+ // 'magic' string
+ char magic[4];
+ bytesRead = bgzf_read(fp, magic, 4);
+ if (bytesRead != 4 || strncmp(magic, "PBI\1", 4))
+ throw std::runtime_error("expected PBI file, found unknown format instead");
+
+ // version, pbi_flags, & n_reads
+ uint32_t version;
+ uint16_t sections;
+ uint32_t numReads;
+ bgzf_read(fp, &version, sizeof(version));
+ bgzf_read(fp, §ions, sizeof(sections));
+ bgzf_read(fp, &numReads, sizeof(numReads));
+ if (fp->is_be) {
+ version = ed_swap_4(version);
+ sections = ed_swap_2(sections);
+ numReads = ed_swap_4(numReads);
+ }
+
+ index.Version(PbiFile::VersionEnum(version));
+ index.FileSections(PbiFile::Sections(sections));
+ index.NumReads(numReads);
+
+ // skip reserved section
+ size_t reservedLength = 18;
+ // adjust depending on version
+ char reserved[18];
+ bytesRead = bgzf_read(fp, &reserved, reservedLength);
+}
+
+void PbiIndexIO::LoadMappedData(PbiRawMappedData& mappedData,
+ const uint32_t numReads,
+ BGZF* fp)
+{
+ assert(numReads > 0);
+ (void)numReads; // quash warnings building in release mode
+
+ LoadBgzfVector(fp, mappedData.tId_, numReads);
+ LoadBgzfVector(fp, mappedData.tStart_, numReads);
+ LoadBgzfVector(fp, mappedData.tEnd_, numReads);
+ LoadBgzfVector(fp, mappedData.aStart_, numReads);
+ LoadBgzfVector(fp, mappedData.aEnd_, numReads);
+ LoadBgzfVector(fp, mappedData.revStrand_, numReads);
+ LoadBgzfVector(fp, mappedData.nM_, numReads);
+ LoadBgzfVector(fp, mappedData.nMM_, numReads);
+ LoadBgzfVector(fp, mappedData.mapQV_, numReads);
+
+ assert(mappedData.tId_.size() == numReads);
+ assert(mappedData.tStart_.size() == numReads);
+ assert(mappedData.tEnd_.size() == numReads);
+ assert(mappedData.aStart_.size() == numReads);
+ assert(mappedData.aEnd_.size() == numReads);
+ assert(mappedData.revStrand_.size() == numReads);
+ assert(mappedData.nM_.size() == numReads);
+ assert(mappedData.nMM_.size() == numReads);
+ assert(mappedData.mapQV_.size() == numReads);
+}
+
+void PbiIndexIO::LoadReferenceData(PbiRawReferenceData& referenceData,
+ BGZF* fp)
+{
+ assert(sizeof(PbiReferenceEntry::ID) == 4);
+ assert(sizeof(PbiReferenceEntry::Row) == 4);
+
+ // num refs
+ uint32_t numRefs;
+ bgzf_read(fp, &numRefs, 4);
+ if (fp->is_be)
+ numRefs = ed_swap_4(numRefs);
+
+ // reference entries
+ referenceData.entries_.clear();
+ referenceData.entries_.resize(numRefs);
+ for (size_t i = 0; i < numRefs; ++i) {
+ PbiReferenceEntry& entry = referenceData.entries_[i];
+ bgzf_read(fp, &entry.tId_, 4);
+ bgzf_read(fp, &entry.beginRow_, 4);
+ bgzf_read(fp, &entry.endRow_, 4);
+ if (fp->is_be) {
+ entry.tId_ = ed_swap_4(entry.tId_);
+ entry.beginRow_ = ed_swap_4(entry.beginRow_);
+ entry.endRow_ = ed_swap_4(entry.endRow_);
+ }
+ }
+}
+
+void PbiIndexIO::LoadBasicData(PbiRawBasicData& basicData,
+ const uint32_t numReads,
+ BGZF* fp)
+{
+ assert(numReads > 0);
+ (void)numReads; // quash warnings building in release mode
+
+ LoadBgzfVector(fp, basicData.rgId_, numReads);
+ LoadBgzfVector(fp, basicData.qStart_, numReads);
+ LoadBgzfVector(fp, basicData.qEnd_, numReads);
+ LoadBgzfVector(fp, basicData.holeNumber_, numReads);
+ LoadBgzfVector(fp, basicData.readQual_, numReads);
+ LoadBgzfVector(fp, basicData.ctxtFlag_, numReads);
+ LoadBgzfVector(fp, basicData.fileOffset_, numReads);
+
+ assert(basicData.rgId_.size() == numReads);
+ assert(basicData.qStart_.size() == numReads);
+ assert(basicData.qEnd_.size() == numReads);
+ assert(basicData.holeNumber_.size() == numReads);
+ assert(basicData.readQual_.size() == numReads);
+ assert(basicData.ctxtFlag_.size() == numReads);
+ assert(basicData.fileOffset_.size() == numReads);
+}
+
+void PbiIndexIO::Save(const PbiRawData& index,
+ const std::string& filename)
+{
+ std::unique_ptr<BGZF, HtslibBgzfDeleter> bgzf(bgzf_open(filename.c_str(), "wb"));
+ BGZF* fp = bgzf.get();
+ if (fp == 0)
+ throw std::runtime_error("could not open PBI file for writing");
+
+ WriteHeader(index, fp);
+ const uint32_t numReads = index.NumReads();
+ if (numReads > 0) {
+ WriteBasicData(index.BasicData(), numReads, fp);
+
+ if (index.HasMappedData())
+ WriteMappedData(index.MappedData(), numReads, fp);
+ if (index.HasReferenceData())
+ WriteReferenceData(index.ReferenceData(), fp);
+ if (index.HasBarcodeData())
+ WriteBarcodeData(index.BarcodeData(), numReads, fp);
+ }
+}
+
+void PbiIndexIO::WriteBarcodeData(const PbiRawBarcodeData& barcodeData,
+ const uint32_t numReads,
+ BGZF* fp)
+{
+ assert(numReads > 0);
+ assert(barcodeData.bcForward_.size() == numReads);
+ assert(barcodeData.bcReverse_.size() == numReads);
+ assert(barcodeData.bcQual_.size() == numReads);
+ (void)numReads; // quash warnings building in release mode
+
+ WriteBgzfVector(fp, barcodeData.bcForward_);
+ WriteBgzfVector(fp, barcodeData.bcReverse_);
+ WriteBgzfVector(fp, barcodeData.bcQual_);
+}
+
+void PbiIndexIO::WriteHeader(const PbiRawData& index,
+ BGZF* fp)
+{
+ // 'magic' string
+ char magic[4];
+ strncpy(magic, "PBI\1", 4);
+ bgzf_write(fp, magic, 4);
+
+ // version, pbi_flags, & n_reads
+ uint32_t version = static_cast<uint32_t>(index.Version());
+ uint16_t pbi_flags = static_cast<uint16_t>(index.FileSections());
+ uint32_t numReads = index.NumReads();
+ if (fp->is_be) {
+ version = ed_swap_4(version);
+ pbi_flags = ed_swap_2(pbi_flags);
+ numReads = ed_swap_4(numReads);
+ }
+ bgzf_write(fp, &version, 4);
+ bgzf_write(fp, &pbi_flags, 2);
+ bgzf_write(fp, &numReads, 4);
+
+ // reserved space
+ char reserved[18];
+ memset(reserved, 0, 18);
+ bgzf_write(fp, reserved, 18);
+}
+
+void PbiIndexIO::WriteMappedData(const PbiRawMappedData& mappedData,
+ const uint32_t numReads,
+ BGZF* fp)
+{
+ assert(mappedData.tId_.size() == numReads);
+ assert(mappedData.tStart_.size() == numReads);
+ assert(mappedData.tEnd_.size() == numReads);
+ assert(mappedData.aStart_.size() == numReads);
+ assert(mappedData.aEnd_.size() == numReads);
+ assert(mappedData.revStrand_.size() == numReads);
+ assert(mappedData.nM_.size() == numReads);
+ assert(mappedData.nMM_.size() == numReads);
+ assert(mappedData.mapQV_.size() == numReads);
+ (void)numReads; // quash warnings building in release mode
+
+ WriteBgzfVector(fp, mappedData.tId_);
+ WriteBgzfVector(fp, mappedData.tStart_);
+ WriteBgzfVector(fp, mappedData.tEnd_);
+ WriteBgzfVector(fp, mappedData.aStart_);
+ WriteBgzfVector(fp, mappedData.aEnd_);
+ WriteBgzfVector(fp, mappedData.revStrand_);
+ WriteBgzfVector(fp, mappedData.nM_);
+ WriteBgzfVector(fp, mappedData.nMM_);
+ WriteBgzfVector(fp, mappedData.mapQV_);
+}
+
+void PbiIndexIO::WriteReferenceData(const PbiRawReferenceData& referenceData,
+ BGZF* fp)
+{
+ // num_refs
+ uint32_t numRefs = referenceData.entries_.size();
+ if (fp->is_be)
+ numRefs = ed_swap_4(numRefs);
+ bgzf_write(fp, &numRefs, 4);
+
+ // reference entries
+ numRefs = referenceData.entries_.size(); // need to reset after maybe endian-swapping
+ for (size_t i = 0; i < numRefs; ++i) {
+ const PbiReferenceEntry& entry = referenceData.entries_[i];
+ uint32_t tId = entry.tId_;
+ uint32_t beginRow = entry.beginRow_;
+ uint32_t endRow = entry.endRow_;
+ if (fp->is_be) {
+ tId = ed_swap_4(tId);
+ beginRow = ed_swap_4(beginRow);
+ endRow = ed_swap_4(endRow);
+ }
+ bgzf_write(fp, &tId, 4);
+ bgzf_write(fp, &beginRow, 4);
+ bgzf_write(fp, &endRow, 4);
+ }
+}
+
+void PbiIndexIO::WriteBasicData(const PbiRawBasicData& basicData,
+ const uint32_t numReads,
+ BGZF* fp)
+{
+ assert(basicData.rgId_.size() == numReads);
+ assert(basicData.qStart_.size() == numReads);
+ assert(basicData.qEnd_.size() == numReads);
+ assert(basicData.holeNumber_.size() == numReads);
+ assert(basicData.readQual_.size() == numReads);
+ assert(basicData.ctxtFlag_.size() == numReads);
+ assert(basicData.fileOffset_.size() == numReads);
+ (void)numReads; // quash warnings building in release mode
+
+ WriteBgzfVector(fp, basicData.rgId_);
+ WriteBgzfVector(fp, basicData.qStart_);
+ WriteBgzfVector(fp, basicData.qEnd_);
+ WriteBgzfVector(fp, basicData.holeNumber_);
+ WriteBgzfVector(fp, basicData.readQual_);
+ WriteBgzfVector(fp, basicData.ctxtFlag_);
+ WriteBgzfVector(fp, basicData.fileOffset_);
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// Author: Derek Barnett
+
+#ifndef PBIINDEXIO_H
+#define PBIINDEXIO_H
+
+#include "pbbam/BamFile.h"
+#include "pbbam/DataSet.h"
+#include "pbbam/PbiFile.h"
+#include "pbbam/PbiRawData.h"
+#include <htslib/bgzf.h>
+#include <htslib/sam.h>
+#include <memory>
+#include <string>
+#include <vector>
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class PbiIndexIO
+{
+public:
+ // top-level entry points
+ static PbiRawData Load(const std::string& filename);
+ static void Load(PbiRawData& rawData, const std::string& filename);
+ static void LoadFromDataSet(PbiRawData& aggregateData, const DataSet& dataset);
+ static void Save(const PbiRawData& rawData, const std::string& filename);
+
+public:
+ // per-component load
+ static void LoadBarcodeData(PbiRawBarcodeData& barcodeData,
+ const uint32_t numReads,
+ BGZF* fp);
+ static void LoadHeader(PbiRawData& index,
+ BGZF* fp);
+ static void LoadMappedData(PbiRawMappedData& mappedData,
+ const uint32_t numReads,
+ BGZF* fp);
+ static void LoadReferenceData(PbiRawReferenceData& referenceData,
+ BGZF* fp);
+ static void LoadBasicData(PbiRawBasicData& basicData,
+ const uint32_t numReads,
+ BGZF* fp);
+
+ // per-data-field load
+ template<typename T>
+ static void LoadBgzfVector(BGZF* fp,
+ std::vector<T>& data,
+ const uint32_t numReads);
+
+public:
+ // per-component write
+ static void WriteBarcodeData(const PbiRawBarcodeData& barcodeData,
+ const uint32_t numReads,
+ BGZF* fp);
+ static void WriteHeader(const PbiRawData& index,
+ BGZF* fp);
+ static void WriteMappedData(const PbiRawMappedData& mappedData,
+ const uint32_t numReads,
+ BGZF* fp);
+ static void WriteReferenceData(const PbiRawReferenceData& referenceData,
+ BGZF* fp);
+ static void WriteBasicData(const PbiRawBasicData& subreadData,
+ const uint32_t numReads,
+ BGZF* fp);
+
+ // per-data-field write
+ template<typename T>
+ static void WriteBgzfVector(BGZF* fp,
+ const std::vector<T>& data);
+
+private:
+ // helper functions
+ template<typename T>
+ static void SwapEndianness(std::vector<T>& data);
+};
+
+template<typename T>
+inline void PbiIndexIO::LoadBgzfVector(BGZF* fp,
+ std::vector<T>& data,
+ const uint32_t numReads)
+{
+ assert(fp);
+ data.resize(numReads);
+ bgzf_read(fp, &data[0], numReads*sizeof(T));
+ if (fp->is_be)
+ SwapEndianness(data);
+}
+
+template<typename T>
+inline void PbiIndexIO::SwapEndianness(std::vector<T>& data)
+{
+ const size_t elementSize = sizeof(T);
+ const size_t numReads = data.size();
+ switch (elementSize) {
+ case 1 : break; // no swapping necessary
+ case 2 :
+ for (size_t i = 0; i < numReads; ++i)
+ ed_swap_2p(&data[i]);
+ break;
+ case 4:
+ for (size_t i = 0; i < numReads; ++i)
+ ed_swap_4p(&data[i]);
+ break;
+ case 8:
+ for (size_t i = 0; i < numReads; ++i)
+ ed_swap_8p(&data[i]);
+ break;
+ default:
+ throw std::runtime_error("unsupported element size");
+ }
+}
+
+template<typename T>
+inline void PbiIndexIO::WriteBgzfVector(BGZF* fp,
+ const std::vector<T>& data)
+{
+ assert(fp);
+ std::vector<T> output = data;
+ if (fp->is_be)
+ SwapEndianness(output);
+ bgzf_write(fp, &output[0], data.size()*sizeof(T));
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // PBIINDEXIO_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiIndexedBamReader.cpp
+/// \brief Implements the PbiIndexedBamReader class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiIndexedBamReader.h"
+#include <htslib/bgzf.h>
+
+#include <iostream>
+
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct PbiIndexedBamReaderPrivate
+{
+public:
+ PbiIndexedBamReaderPrivate(const string& pbiFilename)
+ : index_(pbiFilename)
+ , currentBlockReadCount_(0)
+ { }
+
+ void ApplyOffsets(void)
+ {
+ const std::vector<int64_t>& fileOffsets = index_.BasicData().fileOffset_;
+ for (IndexResultBlock& block : blocks_)
+ block.virtualOffset_ = fileOffsets.at(block.firstIndex_);
+ }
+
+ void Filter(const PbiFilter& filter)
+ {
+ // store request & reset counters
+ filter_ = filter;
+ currentBlockReadCount_ = 0;
+ blocks_.clear();
+
+ // find blocks of reads passing filter criteria
+ const uint32_t numReads = index_.NumReads();
+ if (numReads == 0) { // empty PBI - no reads to use
+ return;
+ } else if (filter_.IsEmpty()) { // empty filter - use all reads
+ blocks_.push_back(IndexResultBlock{0, numReads});
+ } else {
+ IndexList indices;
+ indices.reserve(numReads);
+ for (size_t i = 0; i < numReads; ++i) {
+ if (filter_.Accepts(index_, i))
+ indices.push_back(i);
+ }
+ blocks_ = mergedIndexBlocks(std::move(indices));
+ }
+
+ // apply offsets
+ ApplyOffsets();
+ }
+
+ int ReadRawData(BGZF* bgzf, bam1_t* b)
+ {
+ // no data to fetch, return false
+ if (blocks_.empty())
+ return -1; // "EOF"
+
+ // if on new block, seek to its first record
+ if (currentBlockReadCount_ == 0) {
+ auto seekResult = bgzf_seek(bgzf, blocks_.at(0).virtualOffset_, SEEK_SET);
+ if (seekResult == -1)
+ throw std::runtime_error("could not seek in BAM file");
+ }
+
+ // read next record
+ auto result = bam_read1(bgzf, b);
+
+ // update counters. if block finished, pop & reset
+ ++currentBlockReadCount_;
+ if (currentBlockReadCount_ == blocks_.at(0).numReads_) {
+ blocks_.pop_front();
+ currentBlockReadCount_ = 0;
+ }
+
+ return result;
+ }
+
+public:
+ PbiFilter filter_;
+ PbiRawData index_;
+ IndexResultBlocks blocks_;
+ size_t currentBlockReadCount_;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+PbiIndexedBamReader::PbiIndexedBamReader(const PbiFilter& filter,
+ const std::string& filename)
+ : PbiIndexedBamReader(filter, BamFile(filename))
+{ }
+
+PbiIndexedBamReader::PbiIndexedBamReader(const PbiFilter& filter,
+ const BamFile& bamFile)
+ : PbiIndexedBamReader(bamFile)
+{
+ Filter(filter);
+}
+
+PbiIndexedBamReader::PbiIndexedBamReader(const PbiFilter& filter,
+ BamFile&& bamFile)
+ : PbiIndexedBamReader(std::move(bamFile))
+{
+ Filter(filter);
+}
+
+PbiIndexedBamReader::PbiIndexedBamReader(const std::string& bamFilename)
+ : PbiIndexedBamReader(BamFile(bamFilename))
+{ }
+
+PbiIndexedBamReader::PbiIndexedBamReader(const BamFile& bamFile)
+ : BamReader(bamFile)
+ , d_(new internal::PbiIndexedBamReaderPrivate(File().PacBioIndexFilename()))
+{ }
+
+PbiIndexedBamReader::PbiIndexedBamReader(BamFile&& bamFile)
+ : BamReader(std::move(bamFile))
+ , d_(new internal::PbiIndexedBamReaderPrivate(File().PacBioIndexFilename()))
+{ }
+
+PbiIndexedBamReader::~PbiIndexedBamReader(void) { }
+
+int PbiIndexedBamReader::ReadRawData(BGZF* bgzf, bam1_t* b)
+{
+ assert(d_);
+ return d_->ReadRawData(bgzf, b);
+}
+
+const PbiFilter& PbiIndexedBamReader::Filter(void) const
+{
+ assert(d_);
+ return d_->filter_;
+}
+
+PbiIndexedBamReader& PbiIndexedBamReader::Filter(const PbiFilter& filter)
+{
+ assert(d_);
+ d_->Filter(filter);
+ return *this;
+}
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file PbiRawData.cpp
+/// \brief Implements the classes used for working with raw PBI data.
+//
+// Author: Derek Barnett
+
+#include "pbbam/PbiRawData.h"
+#include "pbbam/BamFile.h"
+#include "pbbam/BamRecord.h"
+#include "PbiIndexIO.h"
+#include <boost/numeric/conversion/cast.hpp>
+#include <map>
+#include <cassert>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static
+string ToString(const RecordType type)
+{
+ static const auto lookup = map<RecordType, string>
+ {
+ { RecordType::ZMW, "ZMW" },
+ { RecordType::HQREGION, "HQREGION" },
+ { RecordType::SUBREAD, "SUBREAD" },
+ { RecordType::CCS, "CCS" },
+ { RecordType::SCRAP, "SCRAP" },
+ { RecordType::UNKNOWN, "UNKNOWN" }
+ };
+
+ try {
+ return lookup.at(type);
+ } catch (std::exception&) {
+ throw std::runtime_error("error: unknown RecordType encountered");
+ }
+}
+
+} // namespace internal
+} // namespace BAM
+} // namesapce PacBio
+
+// ----------------------------------
+// PbiRawBarcodeData implementation
+// ----------------------------------
+
+PbiRawBarcodeData::PbiRawBarcodeData(void) { }
+
+PbiRawBarcodeData::PbiRawBarcodeData(uint32_t numReads)
+{
+ bcForward_.reserve(numReads);
+ bcReverse_.reserve(numReads);
+ bcQual_.reserve(numReads);
+}
+
+PbiRawBarcodeData::PbiRawBarcodeData(const PbiRawBarcodeData& other)
+ : bcForward_(other.bcForward_)
+ , bcReverse_(other.bcReverse_)
+ , bcQual_(other.bcQual_)
+{ }
+
+PbiRawBarcodeData::PbiRawBarcodeData(PbiRawBarcodeData&& other)
+ : bcForward_(std::move(other.bcForward_))
+ , bcReverse_(std::move(other.bcReverse_))
+ , bcQual_(std::move(other.bcQual_))
+{ }
+
+PbiRawBarcodeData& PbiRawBarcodeData::operator=(const PbiRawBarcodeData& other)
+{
+ bcForward_ = other.bcForward_;
+ bcReverse_ = other.bcReverse_;
+ bcQual_ = other.bcQual_;
+ return *this;
+}
+
+PbiRawBarcodeData& PbiRawBarcodeData::operator=(PbiRawBarcodeData&& other)
+{
+ bcForward_ = std::move(other.bcForward_);
+ bcReverse_ = std::move(other.bcReverse_);
+ bcQual_ = std::move(other.bcQual_);
+ return *this;
+}
+
+void PbiRawBarcodeData::AddRecord(const BamRecord& b)
+{
+ // check for any barcode data (both required)
+ if (b.HasBarcodes() && b.HasBarcodeQuality()) {
+
+ // fetch data from record
+ const auto barcodes = b.Barcodes();
+ const auto barcodeQuality = b.BarcodeQuality();
+ const auto bcForward = barcodes.first;
+ const auto bcReverse = barcodes.second;
+ const auto bcQuality = boost::numeric_cast<int8_t>(barcodeQuality);
+
+ // only store actual data if all values >= 0
+ if (bcForward >= 0 && bcReverse >=0 && bcQuality >= 0) {
+ bcForward_.push_back(bcForward);
+ bcReverse_.push_back(bcReverse);
+ bcQual_.push_back(bcQuality);
+ return;
+ }
+ }
+
+ // if we get here, at least one value is either missing or is -1
+ bcForward_.push_back(-1);
+ bcReverse_.push_back(-1);
+ bcQual_.push_back(-1);
+}
+
+// ----------------------------------
+// PbiRawMappedData implementation
+// ----------------------------------
+
+PbiRawMappedData::PbiRawMappedData(void) { }
+
+PbiRawMappedData::PbiRawMappedData(uint32_t numReads)
+{
+ tId_.reserve(numReads);
+ tStart_.reserve(numReads);
+ tEnd_.reserve(numReads);
+ aStart_.reserve(numReads);
+ aEnd_.reserve(numReads);
+ revStrand_.reserve(numReads);
+ nM_.reserve(numReads);
+ nMM_.reserve(numReads);
+ mapQV_.reserve(numReads);
+}
+
+PbiRawMappedData::PbiRawMappedData(const PbiRawMappedData& other)
+ : tId_(other.tId_)
+ , tStart_(other.tStart_)
+ , tEnd_(other.tEnd_)
+ , aStart_(other.aStart_)
+ , aEnd_(other.aEnd_)
+ , revStrand_(other.revStrand_)
+ , nM_(other.nM_)
+ , nMM_(other.nMM_)
+ , mapQV_(other.mapQV_)
+{ }
+
+PbiRawMappedData::PbiRawMappedData(PbiRawMappedData&& other)
+ : tId_(std::move(other.tId_))
+ , tStart_(std::move(other.tStart_))
+ , tEnd_(std::move(other.tEnd_))
+ , aStart_(std::move(other.aStart_))
+ , aEnd_(std::move(other.aEnd_))
+ , revStrand_(std::move(other.revStrand_))
+ , nM_(std::move(other.nM_))
+ , nMM_(std::move(other.nMM_))
+ , mapQV_(std::move(other.mapQV_))
+{ }
+
+PbiRawMappedData& PbiRawMappedData::operator=(const PbiRawMappedData& other)
+{
+ tId_ = other.tId_;
+ tStart_ = other.tStart_;
+ tEnd_ = other.tEnd_;
+ aStart_ = other.aStart_;
+ aEnd_ = other.aEnd_;
+ revStrand_ = other.revStrand_;
+ nM_ = other.nM_;
+ nMM_ = other.nMM_;
+ mapQV_ = other.mapQV_;
+ return *this;
+}
+
+PbiRawMappedData& PbiRawMappedData::operator=(PbiRawMappedData&& other)
+{
+ tId_ = std::move(other.tId_);
+ tStart_ = std::move(other.tStart_);
+ tEnd_ = std::move(other.tEnd_);
+ aStart_ = std::move(other.aStart_);
+ aEnd_ = std::move(other.aEnd_);
+ revStrand_ = std::move(other.revStrand_);
+ nM_ = std::move(other.nM_);
+ nMM_ = std::move(other.nMM_);
+ mapQV_ = std::move(other.mapQV_);
+ return *this;
+}
+
+void PbiRawMappedData::AddRecord(const BamRecord& b)
+{
+ tId_.push_back(b.ReferenceId());
+ tStart_.push_back(b.ReferenceStart());
+ tEnd_.push_back(b.ReferenceEnd());
+ aStart_.push_back(b.AlignedStart());
+ aEnd_.push_back(b.AlignedEnd());
+ revStrand_.push_back( (b.AlignedStrand() == Strand::REVERSE ? 1 : 0) );
+ mapQV_.push_back(b.MapQuality());
+
+ const auto matchesAndMismatches = b.NumMatchesAndMismatches();
+ nM_.push_back(matchesAndMismatches.first);
+ nMM_.push_back(matchesAndMismatches.second);
+}
+
+uint32_t PbiRawMappedData::NumDeletedBasesAt(size_t recordIndex) const
+{ return NumDeletedAndInsertedBasesAt(recordIndex).first; }
+
+std::pair<uint32_t, uint32_t> PbiRawMappedData::NumDeletedAndInsertedBasesAt(size_t recordIndex) const
+{
+ const auto aStart = aStart_.at(recordIndex);
+ const auto aEnd = aEnd_.at(recordIndex);
+ const auto tStart = tStart_.at(recordIndex);
+ const auto tEnd = tEnd_.at(recordIndex);
+ const auto nM = nM_.at(recordIndex);
+ const auto nMM = nMM_.at(recordIndex);
+ const auto numIns = (aEnd - aStart - nM - nMM);
+ const auto numDel = (tEnd - tStart - nM - nMM);
+ return std::make_pair(numDel, numIns);
+}
+
+uint32_t PbiRawMappedData::NumInsertedBasesAt(size_t recordIndex) const
+{ return NumDeletedAndInsertedBasesAt(recordIndex).second; }
+
+// ------------------------------------
+// PbiReferenceEntry implementation
+// ------------------------------------
+
+const PbiReferenceEntry::ID PbiReferenceEntry::UNMAPPED_ID = static_cast<PbiReferenceEntry::ID>(-1);
+const PbiReferenceEntry::Row PbiReferenceEntry::UNSET_ROW = static_cast<PbiReferenceEntry::Row>(-1);
+
+PbiReferenceEntry::PbiReferenceEntry(void)
+ : tId_(UNMAPPED_ID)
+ , beginRow_(UNSET_ROW)
+ , endRow_(UNSET_ROW)
+{ }
+
+PbiReferenceEntry::PbiReferenceEntry(ID id)
+ : tId_(id)
+ , beginRow_(UNSET_ROW)
+ , endRow_(UNSET_ROW)
+{ }
+
+PbiReferenceEntry::PbiReferenceEntry(ID id, Row beginRow, Row endRow)
+ : tId_(id)
+ , beginRow_(beginRow)
+ , endRow_(endRow)
+{ }
+
+PbiReferenceEntry::PbiReferenceEntry(const PbiReferenceEntry& other)
+ : tId_(other.tId_)
+ , beginRow_(other.beginRow_)
+ , endRow_(other.endRow_)
+{ }
+
+PbiReferenceEntry::PbiReferenceEntry(PbiReferenceEntry&& other)
+ : tId_(std::move(other.tId_))
+ , beginRow_(std::move(other.beginRow_))
+ , endRow_(std::move(other.endRow_))
+{ }
+
+PbiReferenceEntry& PbiReferenceEntry::operator=(const PbiReferenceEntry& other)
+{
+ tId_ = other.tId_;
+ beginRow_ = other.beginRow_;
+ endRow_ = other.endRow_;
+ return *this;
+}
+
+PbiReferenceEntry& PbiReferenceEntry::operator=(PbiReferenceEntry&& other)
+{
+ tId_ = std::move(other.tId_);
+ beginRow_ = std::move(other.beginRow_);
+ endRow_ = std::move(other.endRow_);
+ return *this;
+}
+
+// ------------------------------------
+// PbiRawReferenceData implementation
+// ------------------------------------
+
+PbiRawReferenceData::PbiRawReferenceData(void) { }
+
+PbiRawReferenceData::PbiRawReferenceData(uint32_t numRefs)
+{ entries_.reserve(numRefs); }
+
+PbiRawReferenceData::PbiRawReferenceData(const PbiRawReferenceData& other)
+ : entries_(other.entries_)
+{ }
+
+PbiRawReferenceData::PbiRawReferenceData(PbiRawReferenceData&& other)
+ : entries_(std::move(other.entries_))
+{ }
+
+PbiRawReferenceData& PbiRawReferenceData::operator=(const PbiRawReferenceData& other)
+{
+ entries_ = other.entries_;
+ return *this;
+}
+
+PbiRawReferenceData& PbiRawReferenceData::operator=(PbiRawReferenceData&& other)
+{
+ entries_ = std::move(other.entries_);
+ return *this;
+}
+
+// ----------------------------------
+// PbiRawSubreadData implementation
+// ----------------------------------
+
+PbiRawBasicData::PbiRawBasicData(void) { }
+
+PbiRawBasicData::PbiRawBasicData(uint32_t numReads)
+{
+ rgId_.reserve(numReads);
+ qStart_.reserve(numReads);
+ qEnd_.reserve(numReads);
+ holeNumber_.reserve(numReads);
+ readQual_.reserve(numReads);
+ ctxtFlag_.reserve(numReads);
+ fileOffset_.reserve(numReads);
+ fileNumber_.reserve(numReads);
+}
+
+PbiRawBasicData::PbiRawBasicData(const PbiRawBasicData& other)
+ : rgId_(other.rgId_)
+ , qStart_(other.qStart_)
+ , qEnd_(other.qEnd_)
+ , holeNumber_(other.holeNumber_)
+ , readQual_(other.readQual_)
+ , ctxtFlag_(other.ctxtFlag_)
+ , fileOffset_(other.fileOffset_)
+ , fileNumber_(other.fileNumber_)
+{ }
+
+PbiRawBasicData::PbiRawBasicData(PbiRawBasicData&& other)
+ : rgId_(std::move(other.rgId_))
+ , qStart_(std::move(other.qStart_))
+ , qEnd_(std::move(other.qEnd_))
+ , holeNumber_(std::move(other.holeNumber_))
+ , readQual_(std::move(other.readQual_))
+ , ctxtFlag_(std::move(other.ctxtFlag_))
+ , fileOffset_(std::move(other.fileOffset_))
+ , fileNumber_(std::move(other.fileNumber_))
+{ }
+
+PbiRawBasicData& PbiRawBasicData::operator=(const PbiRawBasicData& other)
+{
+ rgId_ = other.rgId_;
+ qStart_ = other.qStart_;
+ qEnd_ = other.qEnd_;
+ holeNumber_ = other.holeNumber_;
+ readQual_ = other.readQual_;
+ ctxtFlag_ = other.ctxtFlag_;
+ fileOffset_ = other.fileOffset_;
+ fileNumber_ = other.fileNumber_;
+ return *this;
+}
+
+PbiRawBasicData& PbiRawBasicData::operator=(PbiRawBasicData&& other)
+{
+ rgId_ = std::move(other.rgId_);
+ qStart_ = std::move(other.qStart_);
+ qEnd_ = std::move(other.qEnd_);
+ holeNumber_ = std::move(other.holeNumber_);
+ readQual_ = std::move(other.readQual_);
+ ctxtFlag_ = std::move(other.ctxtFlag_);
+ fileOffset_ = std::move(other.fileOffset_);
+ fileNumber_ = std::move(other.fileNumber_);
+ return *this;
+}
+
+void PbiRawBasicData::AddRecord(const BamRecord& b, int64_t offset)
+{
+ // read group ID
+ auto rgId = b.ReadGroupId();
+ if (rgId.empty())
+ rgId = MakeReadGroupId(b.MovieName(), internal::ToString(b.Type()));
+ const uint32_t rawid = std::stoul(rgId, nullptr, 16);
+ const int32_t id = static_cast<int32_t>(rawid);
+ rgId_.push_back(id);
+
+ // query start/end
+ if (b.Type() == RecordType::CCS) {
+ qStart_.push_back(-1);
+ qEnd_.push_back(-1);
+ } else {
+ qStart_.push_back(b.QueryStart());
+ qEnd_.push_back(b.QueryEnd());
+ }
+
+ // add'l basic data
+ holeNumber_.push_back(b.HasHoleNumber() ? b.HoleNumber() : 0);
+ readQual_.push_back(b.HasReadAccuracy() ? static_cast<float>(b.ReadAccuracy()) : 0.0f);
+ ctxtFlag_.push_back(b.HasLocalContextFlags() ? b.LocalContextFlags() : LocalContextFlags::NO_LOCAL_CONTEXT);
+
+ // virtual offset of record start
+ fileOffset_.push_back(offset);
+
+ // default file number
+ fileNumber_.push_back(0);
+}
+
+// ----------------------------------
+// PbiRawData implementation
+// ----------------------------------
+
+PbiRawData::PbiRawData(void)
+ : version_(PbiFile::CurrentVersion)
+ , sections_(PbiFile::ALL)
+ , numReads_(0)
+{ }
+
+PbiRawData::PbiRawData(const string& pbiFilename)
+ : filename_(pbiFilename)
+ , version_(PbiFile::CurrentVersion)
+ , sections_(PbiFile::ALL)
+ , numReads_(0)
+{
+ internal::PbiIndexIO::Load(*this, pbiFilename);
+}
+
+PbiRawData::PbiRawData(const DataSet& dataset)
+ : version_(PbiFile::CurrentVersion)
+ , sections_(PbiFile::BASIC | PbiFile::MAPPED | PbiFile::BARCODE)
+ , numReads_(0)
+{
+ internal::PbiIndexIO::LoadFromDataSet(*this, dataset);
+}
+
+PbiRawData::PbiRawData(const PbiRawData& other)
+ : filename_(other.filename_)
+ , version_(other.version_)
+ , sections_(other.sections_)
+ , numReads_(other.numReads_)
+ , barcodeData_(other.barcodeData_)
+ , mappedData_(other.mappedData_)
+ , referenceData_(other.referenceData_)
+ , basicData_(other.basicData_)
+{ }
+
+PbiRawData::PbiRawData(PbiRawData&& other)
+ : filename_(std::move(other.filename_))
+ , version_(std::move(other.version_))
+ , sections_(std::move(other.sections_))
+ , numReads_(std::move(other.numReads_))
+ , barcodeData_(std::move(other.barcodeData_))
+ , mappedData_(std::move(other.mappedData_))
+ , referenceData_(std::move(other.referenceData_))
+ , basicData_(std::move(other.basicData_))
+{ }
+
+PbiRawData& PbiRawData::operator=(const PbiRawData& other)
+{
+ filename_ = other.filename_;
+ version_ = other.version_;
+ sections_ = other.sections_;
+ numReads_ = other.numReads_;
+ barcodeData_ = other.barcodeData_;
+ mappedData_ = other.mappedData_;
+ referenceData_ = other.referenceData_;
+ basicData_ = other.basicData_;
+ return *this;
+}
+
+PbiRawData& PbiRawData::operator=(PbiRawData&& other)
+{
+ filename_ = std::move(other.filename_);
+ version_ = std::move(other.version_);
+ sections_ = std::move(other.sections_);
+ numReads_ = std::move(other.numReads_);
+ barcodeData_ = std::move(other.barcodeData_);
+ mappedData_ = std::move(other.mappedData_);
+ referenceData_ = std::move(other.referenceData_);
+ basicData_ = std::move(other.basicData_);
+ return *this;
+}
+
+PbiRawData::~PbiRawData(void) { }
--- /dev/null
+
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ProgramInfo.cpp
+/// \brief Implements the ProgramInfo class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/ProgramInfo.h"
+#include "SequenceUtils.h"
+#include <sstream>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static string token_ID = string("ID");
+static string token_CL = string("CL");
+static string token_DS = string("DS");
+static string token_PN = string("PN");
+static string token_PP = string("PP");
+static string token_VN = string("VN");
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+ProgramInfo::ProgramInfo(void) { }
+
+ProgramInfo::ProgramInfo(const std::string& id)
+ : id_(id)
+{ }
+
+ProgramInfo::ProgramInfo(const ProgramInfo& other)
+ : commandLine_(other.commandLine_)
+ , description_(other.description_)
+ , id_(other.id_)
+ , name_(other.name_)
+ , previousProgramId_(other.previousProgramId_)
+ , version_(other.version_)
+{ }
+
+ProgramInfo::ProgramInfo(ProgramInfo&& other)
+ : commandLine_(std::move(other.commandLine_))
+ , description_(std::move(other.description_))
+ , id_(std::move(other.id_))
+ , name_(std::move(other.name_))
+ , previousProgramId_(std::move(other.previousProgramId_))
+ , version_(std::move(other.version_))
+{ }
+
+ProgramInfo::~ProgramInfo(void) { }
+
+ProgramInfo& ProgramInfo::operator=(const ProgramInfo& other)
+{
+ commandLine_ = other.commandLine_;
+ description_ = other.description_;
+ id_ = other.id_;
+ name_ = other.name_;
+ previousProgramId_ = other.previousProgramId_;
+ version_ = other.version_;
+ return *this;
+}
+
+ProgramInfo& ProgramInfo::operator=(ProgramInfo&& other)
+{
+ commandLine_ = std::move(other.commandLine_);
+ description_ = std::move(other.description_);
+ id_ = std::move(other.id_);
+ name_ = std::move(other.name_);
+ previousProgramId_ = std::move(other.previousProgramId_);
+ version_ = std::move(other.version_);
+ return *this;
+}
+
+ProgramInfo ProgramInfo::FromSam(const string& sam)
+{
+ // pop off '@PG\t', then split rest of line into tokens
+ const vector<string>& tokens = internal::Split(sam.substr(4), '\t');
+ if (tokens.empty())
+ return ProgramInfo();
+
+ ProgramInfo prog;
+ map<string, string> custom;
+
+ // iterate over tokens
+ for (const string& token : tokens) {
+ const string& tokenTag = token.substr(0,2);
+ const string& tokenValue = token.substr(3);
+
+ // set program contents
+ if (tokenTag == internal::token_ID) prog.Id(tokenValue);
+ else if (tokenTag == internal::token_CL) prog.CommandLine(tokenValue);
+ else if (tokenTag == internal::token_DS) prog.Description(tokenValue);
+ else if (tokenTag == internal::token_PN) prog.Name(tokenValue);
+ else if (tokenTag == internal::token_PP) prog.PreviousProgramId(tokenValue);
+ else if (tokenTag == internal::token_VN) prog.Version(tokenValue);
+
+ // otherwise, "custom" tag
+ else
+ custom[tokenTag] = tokenValue;
+ }
+
+ prog.CustomTags(custom);
+ return prog;
+}
+
+string ProgramInfo::ToSam(void) const
+{
+ stringstream out;
+ out << "@PG"
+ << internal::MakeSamTag(internal::token_ID, id_);
+
+ if (!name_.empty()) out << internal::MakeSamTag(internal::token_PN, name_);
+ if (!version_.empty()) out << internal::MakeSamTag(internal::token_VN, version_);
+ if (!description_.empty()) out << internal::MakeSamTag(internal::token_DS, description_);
+ if (!previousProgramId_.empty()) out << internal::MakeSamTag(internal::token_PP, previousProgramId_);
+ if (!commandLine_.empty()) out << internal::MakeSamTag(internal::token_CL, commandLine_);
+
+ // append any custom tags
+ map<string, string>::const_iterator customIter = custom_.cbegin();
+ map<string, string>::const_iterator customEnd = custom_.cend();
+ for ( ; customIter != customEnd; ++customIter )
+ out << internal::MakeSamTag(customIter->first, customIter->second);
+
+ return out.str();
+}
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// Author: Derek Barnett
+
+#ifndef PULSE2BASECACHE_H
+#define PULSE2BASECACHE_H
+
+#include "pbbam/Config.h"
+#include <boost/dynamic_bitset.hpp>
+#include <string>
+#include <cassert>
+#include <cctype>
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class Pulse2BaseCache
+{
+public:
+ /// \brief Creates a Pulse2BaseCache from pulseCall data ('pc' tag)
+ ///
+ /// Computes & stores cache of basecalled vs. squashed pulse positions for
+ /// later masking of pulse data.
+ ///
+ /// \param pulseCalls[in] string contents of 'pc' tag
+ ///
+ Pulse2BaseCache(const std::string& pulseCalls)
+ : data_(pulseCalls.size())
+ {
+ // basecalled pulse -> data[i] == 1
+ // squashed pulse -> data[i] == 0
+ //
+ const auto numPulses = pulseCalls.size();
+ for (size_t i = 0; i < numPulses; ++i)
+ data_[i] = std::isupper(pulseCalls.at(i));
+ }
+
+ Pulse2BaseCache(void) = delete;
+ Pulse2BaseCache(const Pulse2BaseCache& other) = default;
+ Pulse2BaseCache(Pulse2BaseCache&& other) = default;
+ Pulse2BaseCache& operator=(const Pulse2BaseCache&) = default;
+ Pulse2BaseCache& operator=(Pulse2BaseCache&&) = default;
+ ~Pulse2BaseCache(void) noexcept {}
+
+public:
+
+ ///
+ /// \brief FindFirst
+ /// \return
+ ///
+ size_t FindFirst(void) const
+ { return data_.find_first(); }
+
+ ///
+ /// \brief FindNext
+ /// \param from
+ /// \return
+ ///
+ size_t FindNext(size_t from) const
+ { return data_.find_next(from); }
+
+ ///
+ /// \brief IsBasecallAt
+ /// \param pos
+ /// \return
+ ///
+ bool IsBasecallAt(const size_t pos) const
+ { return data_[pos]; }
+
+ /// \returns the total number of pulses (basecalled & squashed)
+ ///
+ size_t NumPulses(void) const
+ {
+ return data_.size();
+ }
+
+ /// \returns the total number of basecalled pulses
+ ///
+ size_t NumBases(void) const
+ {
+ return data_.count();
+ }
+
+ /// \brief Removes squashed pulse positions from input data.
+ ///
+ /// \param[in] Contents of any per-pulse tag.
+ /// \returns Input \p pulseData less all squashed pulses
+ ///
+ template<typename T>
+ T RemoveSquashedPulses(const T& pulseData) const
+ {
+ const auto numPulses = pulseData.size();
+ assert(numPulses == data_.size());
+
+ // The reserve() below overshoots the required space, but numPulses is cheap
+ // to compute, and by definition will be sufficient to hold the result. Thus
+ // we only ever need to do one allocation.
+ //
+ T result;
+ result.reserve(numPulses);
+
+ // Only include data at positions that match our cached pulse data.
+ //
+ size_t inputIndex = 0;
+ for (size_t i = 0; i < numPulses; ++i) {
+ if (data_[i])
+ result.push_back(pulseData.at(inputIndex));
+ ++inputIndex;
+ }
+ return result;
+ }
+
+private:
+ boost::dynamic_bitset<> data_;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // PULSE2BASECACHE_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file QNameQuery.cpp
+/// \brief Implements the QNameQuery class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/QNameQuery.h"
+#include "pbbam/CompositeBamReader.h"
+#include <boost/optional.hpp>
+#include <cassert>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+struct QNameQuery::QNameQueryPrivate
+{
+public:
+ QNameQueryPrivate(const DataSet& dataset)
+ : reader_(new SequentialCompositeBamReader(dataset))
+ , nextRecord_(boost::none)
+ { }
+
+ bool GetNext(vector<BamRecord>& records)
+ {
+ records.clear();
+
+ string groupRecordName;
+
+ if (nextRecord_.is_initialized()) {
+ BamRecord r = nextRecord_.get();
+ groupRecordName = r.FullName();
+ records.push_back(std::move(r));
+ nextRecord_ = boost::none;
+ }
+
+ BamRecord record;
+ while (reader_->GetNext(record)) {
+ if (records.empty()) {
+ groupRecordName = record.FullName();
+ records.push_back(record);
+ }
+ else {
+ assert(!records.empty());
+ if (record.FullName() == groupRecordName)
+ records.push_back(record);
+ else {
+ nextRecord_ = record;
+ return true;
+ }
+ }
+ }
+ return !records.empty();
+ }
+
+public:
+ unique_ptr<SequentialCompositeBamReader> reader_;
+ boost::optional<BamRecord> nextRecord_;
+};
+
+QNameQuery::QNameQuery(const DataSet& dataset)
+ : internal::IGroupQuery()
+ , d_(new QNameQueryPrivate(dataset))
+{ }
+
+QNameQuery::~QNameQuery(void) { }
+
+bool QNameQuery::GetNext(vector<BamRecord>& records)
+{ return d_->GetNext(records); }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file QualityValue.h
+/// \brief Implements the QualityValue class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/QualityValue.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+const uint8_t QualityValue::MAX = 93;
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ReadAccuracyQuery.cpp
+/// \brief Implements the ReadAccuracyQuery class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/ReadAccuracyQuery.h"
+#include "pbbam/PbiFilterTypes.h"
+#include "pbbam/CompositeBamReader.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+struct ReadAccuracyQuery::ReadAccuracyQueryPrivate
+{
+ ReadAccuracyQueryPrivate(const Accuracy accuracy,
+ const Compare::Type compareType,
+ const DataSet& dataset)
+ : reader_(PbiReadAccuracyFilter(accuracy, compareType), dataset)
+ { }
+
+ PbiFilterCompositeBamReader<Compare::None> reader_; // unsorted
+};
+
+ReadAccuracyQuery::ReadAccuracyQuery(const Accuracy accuracy,
+ const Compare::Type compareType,
+ const DataSet& dataset)
+ : internal::IQuery()
+ , d_(new ReadAccuracyQueryPrivate(accuracy, compareType, dataset))
+{ }
+
+ReadAccuracyQuery::~ReadAccuracyQuery(void) { }
+
+bool ReadAccuracyQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ReadGroupInfo.cpp
+/// \brief Implements the ReadGroupInfo class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/ReadGroupInfo.h"
+#include "pbbam/MD5.h"
+#include "ChemistryTable.h"
+#include "SequenceUtils.h"
+#include <iomanip>
+#include <set>
+#include <sstream>
+#include <stdexcept>
+#include <cstdio>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static const string sam_ID = string{ "ID" };
+static const string sam_CN = string{ "CN" };
+static const string sam_DS = string{ "DS" };
+static const string sam_DT = string{ "DT" };
+static const string sam_FO = string{ "FO" };
+static const string sam_KS = string{ "KS" };
+static const string sam_LB = string{ "LB" };
+static const string sam_PG = string{ "PG" };
+static const string sam_PI = string{ "PI" };
+static const string sam_PL = string{ "PL" };
+static const string sam_PM = string{ "PM" };
+static const string sam_PU = string{ "PU" };
+static const string sam_SM = string{ "SM" };
+
+static const string feature_DQ = string{ "DeletionQV" };
+static const string feature_DT = string{ "DeletionTag" };
+static const string feature_IQ = string{ "InsertionQV" };
+static const string feature_MQ = string{ "MergeQV" };
+static const string feature_SQ = string{ "SubstitutionQV" };
+static const string feature_ST = string{ "SubstitutionTag" };
+static const string feature_IP = string{ "Ipd" };
+static const string feature_PW = string{ "PulseWidth" };
+static const string feature_PM = string{ "PkMid" };
+static const string feature_PA = string{ "PkMean" };
+static const string feature_PI = string{ "PkMid2" };
+static const string feature_PS = string{ "PkMean2" };
+static const string feature_LT = string{ "Label" };
+static const string feature_PQ = string{ "LabelQV" };
+static const string feature_PT = string{ "AltLabel" };
+static const string feature_PV = string{ "AltLabelQV" };
+static const string feature_PG = string{ "PulseMergeQV" };
+static const string feature_PC = string{ "PulseCall" };
+static const string feature_PD = string{ "PrePulseFrames" };
+static const string feature_PX = string{ "PulseCallWidth" };
+static const string feature_SF = string{ "StartFrame" };
+
+static const string token_RT = string{ "READTYPE" };
+static const string token_BK = string{ "BINDINGKIT" };
+static const string token_SK = string{ "SEQUENCINGKIT" };
+static const string token_BV = string{ "BASECALLERVERSION" };
+static const string token_FR = string{ "FRAMERATEHZ" };
+static const string token_CT = string{ "CONTROL" };
+
+static const string token_BF = string{ "BarcodeFile" };
+static const string token_BH = string{ "BarcodeHash" };
+static const string token_BC = string{ "BarcodeCount" };
+static const string token_BM = string{ "BarcodeMode" };
+static const string token_BQ = string{ "BarcodeQuality" };
+
+static const string codec_RAW = string{ "Frames" };
+static const string codec_V1 = string{ "CodecV1" };
+
+static const string barcodemode_NONE = string{ "None" };
+static const string barcodemode_SYM = string{ "Symmetric" };
+static const string barcodemode_ASYM = string{ "Asymmetric" };
+
+static const string barcodequal_NONE = string{ "None" };
+static const string barcodequal_SCORE = string{ "Score" };
+static const string barcodequal_PROB = string{ "Probability" };
+
+static const string platformModelType_ASTRO = string{ "ASTRO" };
+static const string platformModelType_RS = string{ "RS" };
+static const string platformModelType_SEQUEL = string{ "SEQUEL" };
+
+static
+string BaseFeatureName(const BaseFeature& feature)
+{
+ switch(feature) {
+ case BaseFeature::DELETION_QV : return feature_DQ;
+ case BaseFeature::DELETION_TAG : return feature_DT;
+ case BaseFeature::INSERTION_QV : return feature_IQ;
+ case BaseFeature::MERGE_QV : return feature_MQ;
+ case BaseFeature::SUBSTITUTION_QV : return feature_SQ;
+ case BaseFeature::SUBSTITUTION_TAG : return feature_ST;
+ case BaseFeature::IPD : return feature_IP;
+ case BaseFeature::PULSE_WIDTH : return feature_PW;
+ case BaseFeature::PKMID : return feature_PM;
+ case BaseFeature::PKMEAN : return feature_PA;
+ case BaseFeature::PKMID2 : return feature_PI;
+ case BaseFeature::PKMEAN2 : return feature_PS;
+ case BaseFeature::LABEL_QV : return feature_PQ;
+ case BaseFeature::ALT_LABEL : return feature_PT;
+ case BaseFeature::ALT_LABEL_QV : return feature_PV;
+ case BaseFeature::PULSE_MERGE_QV : return feature_PG;
+ case BaseFeature::PULSE_CALL : return feature_PC;
+ case BaseFeature::PRE_PULSE_FRAMES : return feature_PD;
+ case BaseFeature::PULSE_CALL_WIDTH : return feature_PX;
+ case BaseFeature::START_FRAME : return feature_SF;
+ default:
+ throw std::runtime_error{ "unrecognized base feature" };
+ }
+ return string{ }; // unreachable
+}
+
+static
+string FrameCodecName(const FrameCodec& codec)
+{
+ switch (codec) {
+ case FrameCodec::RAW : return codec_RAW;
+ case FrameCodec::V1 : return codec_V1;
+ default:
+ throw std::runtime_error{ "unrecognized frame codec" };
+ }
+ return string{ }; // unreachable
+}
+
+static
+string BarcodeModeName(const BarcodeModeType& mode)
+{
+ switch (mode) {
+ case BarcodeModeType::NONE : return barcodemode_NONE;
+ case BarcodeModeType::SYMMETRIC : return barcodemode_SYM;
+ case BarcodeModeType::ASYMMETRIC : return barcodemode_ASYM;
+ default:
+ throw std::runtime_error{ "unrecognized barcode mode" };
+ }
+ return string{ }; // unreachable
+}
+
+static
+string BarcodeQualityName(const BarcodeQualityType& type)
+{
+ switch (type) {
+ case BarcodeQualityType::NONE : return barcodequal_NONE;
+ case BarcodeQualityType::SCORE : return barcodequal_SCORE;
+ case BarcodeQualityType::PROBABILITY : return barcodequal_PROB;
+ default:
+ throw std::runtime_error{ "unrecognized barcode quality type" };
+ }
+ return string{ }; // unreachable
+}
+
+static
+string PlatformModelName(const PlatformModelType& type)
+{
+ switch (type) {
+ case PlatformModelType::ASTRO : return platformModelType_ASTRO;
+ case PlatformModelType::RS : return platformModelType_RS;
+ case PlatformModelType::SEQUEL : return platformModelType_SEQUEL;
+ default:
+ throw std::runtime_error{ "unrecognized platform model" };
+ }
+ return string{ }; // unreachable
+}
+
+static map<string, BaseFeature> nameToFeature;
+static map<string, FrameCodec> nameToCodec;
+static map<string, BarcodeModeType> nameToBarcodeMode;
+static map<string, BarcodeQualityType> nameToBarcodeQuality;
+static map<string, PlatformModelType> nameToPlatformModel;
+
+static inline
+void InitNameToFeature(void)
+{
+ if (nameToFeature.empty()) {
+ nameToFeature[feature_DQ] = BaseFeature::DELETION_QV;
+ nameToFeature[feature_DT] = BaseFeature::DELETION_TAG;
+ nameToFeature[feature_IQ] = BaseFeature::INSERTION_QV;
+ nameToFeature[feature_MQ] = BaseFeature::MERGE_QV;
+ nameToFeature[feature_SQ] = BaseFeature::SUBSTITUTION_QV;
+ nameToFeature[feature_ST] = BaseFeature::SUBSTITUTION_TAG;
+ nameToFeature[feature_IP] = BaseFeature::IPD;
+ nameToFeature[feature_PW] = BaseFeature::PULSE_WIDTH;
+ nameToFeature[feature_PM] = BaseFeature::PKMID;
+ nameToFeature[feature_PA] = BaseFeature::PKMEAN;
+ nameToFeature[feature_PI] = BaseFeature::PKMID2;
+ nameToFeature[feature_PS] = BaseFeature::PKMEAN2;
+ nameToFeature[feature_PQ] = BaseFeature::LABEL_QV;
+ nameToFeature[feature_PT] = BaseFeature::ALT_LABEL;
+ nameToFeature[feature_PV] = BaseFeature::ALT_LABEL_QV;
+ nameToFeature[feature_PC] = BaseFeature::PULSE_CALL;
+ nameToFeature[feature_PG] = BaseFeature::PULSE_MERGE_QV;
+ nameToFeature[feature_PD] = BaseFeature::PRE_PULSE_FRAMES;
+ nameToFeature[feature_PX] = BaseFeature::PULSE_CALL_WIDTH;
+ nameToFeature[feature_SF] = BaseFeature::START_FRAME;
+ }
+}
+
+static inline
+void InitNameToCodec(void)
+{
+ if (nameToCodec.empty()) {
+ nameToCodec[codec_RAW] = FrameCodec::RAW;
+ nameToCodec[codec_V1] = FrameCodec::V1;
+ }
+}
+
+static inline
+void InitNameToBarcodeMode(void)
+{
+ if (nameToBarcodeMode.empty()) {
+ nameToBarcodeMode[barcodemode_NONE] = BarcodeModeType::NONE;
+ nameToBarcodeMode[barcodemode_SYM] = BarcodeModeType::SYMMETRIC;
+ nameToBarcodeMode[barcodemode_ASYM] = BarcodeModeType::ASYMMETRIC;
+ }
+}
+
+static inline
+void InitNameToBarcodeQuality(void)
+{
+ if (nameToBarcodeQuality.empty()) {
+ nameToBarcodeQuality[barcodequal_NONE] = BarcodeQualityType::NONE;
+ nameToBarcodeQuality[barcodequal_SCORE] = BarcodeQualityType::SCORE;
+ nameToBarcodeQuality[barcodequal_PROB] = BarcodeQualityType::PROBABILITY;
+ }
+}
+
+static inline
+void InitNameToPlatformModel(void)
+{
+ if (nameToPlatformModel.empty()) {
+ nameToPlatformModel[platformModelType_ASTRO] = PlatformModelType::ASTRO;
+ nameToPlatformModel[platformModelType_RS] = PlatformModelType::RS;
+ nameToPlatformModel[platformModelType_SEQUEL] = PlatformModelType::SEQUEL;
+ }
+}
+
+static inline
+bool IsLikelyBarcodeKey(const string& name)
+{ return name.find("Barcode") == 0; }
+
+static inline
+bool IsBaseFeature(const string& name)
+{
+ InitNameToFeature();
+ return nameToFeature.find(name) != nameToFeature.cend();
+}
+
+static inline
+BaseFeature BaseFeatureFromName(const string& name)
+{
+ InitNameToFeature();
+ return nameToFeature.at(name);
+}
+
+static inline
+FrameCodec FrameCodecFromName(const string& name)
+{
+ InitNameToCodec();
+ return nameToCodec.at(name);
+}
+
+static inline
+BarcodeModeType BarcodeModeFromName(const string& name)
+{
+ InitNameToBarcodeMode();
+ return nameToBarcodeMode.at(name);
+}
+
+static inline
+BarcodeQualityType BarcodeQualityFromName(const string& name)
+{
+ InitNameToBarcodeQuality();
+ return nameToBarcodeQuality.at(name);
+}
+
+static inline
+PlatformModelType PlatformModelFromName(const string& name)
+{
+ InitNameToPlatformModel();
+ return nameToPlatformModel.at(name);
+}
+
+} // namespace internal
+
+ReadGroupInfo::ReadGroupInfo(void)
+ : platformModel_(PlatformModelType::SEQUEL)
+ , readType_("UNKNOWN")
+ , ipdCodec_(FrameCodec::V1)
+ , pulseWidthCodec_(FrameCodec::V1)
+{ }
+
+ReadGroupInfo::ReadGroupInfo(const std::string& id)
+ : id_(id)
+ , platformModel_(PlatformModelType::SEQUEL)
+ , readType_("UNKNOWN")
+ , ipdCodec_(FrameCodec::V1)
+ , pulseWidthCodec_(FrameCodec::V1)
+{ }
+
+ReadGroupInfo::ReadGroupInfo(const std::string& movieName,
+ const std::string& readType)
+ : id_(MakeReadGroupId(movieName, readType))
+ , movieName_(movieName)
+ , platformModel_(PlatformModelType::SEQUEL)
+ , readType_(readType)
+ , ipdCodec_(FrameCodec::V1)
+ , pulseWidthCodec_(FrameCodec::V1)
+{ }
+
+ReadGroupInfo::ReadGroupInfo(const std::string& movieName,
+ const std::string& readType,
+ const PlatformModelType platform)
+ : id_(MakeReadGroupId(movieName, readType))
+ , movieName_(movieName)
+ , platformModel_(platform)
+ , readType_(readType)
+ , ipdCodec_(FrameCodec::V1)
+ , pulseWidthCodec_(FrameCodec::V1)
+{ }
+
+ReadGroupInfo::ReadGroupInfo(const ReadGroupInfo& other)
+ : id_(other.id_)
+ , sequencingCenter_(other.sequencingCenter_)
+ , date_(other.date_)
+ , flowOrder_(other.flowOrder_)
+ , keySequence_(other.keySequence_)
+ , library_(other.library_)
+ , programs_(other.programs_)
+ , predictedInsertSize_(other.predictedInsertSize_)
+ , movieName_(other.movieName_)
+ , sample_(other.sample_)
+ , platformModel_(other.platformModel_)
+ , readType_(other.readType_)
+ , bindingKit_(other.bindingKit_)
+ , sequencingKit_(other.sequencingKit_)
+ , basecallerVersion_(other.basecallerVersion_)
+ , frameRateHz_(other.frameRateHz_)
+ , control_(other.control_)
+ , ipdCodec_(other.ipdCodec_)
+ , pulseWidthCodec_(other.pulseWidthCodec_)
+ , hasBarcodeData_(other.hasBarcodeData_)
+ , barcodeFile_(other.barcodeFile_)
+ , barcodeHash_(other.barcodeHash_)
+ , barcodeCount_(other.barcodeCount_)
+ , barcodeMode_(other.barcodeMode_)
+ , barcodeQuality_(other.barcodeQuality_)
+ , features_(other.features_)
+{ }
+
+ReadGroupInfo::ReadGroupInfo(ReadGroupInfo&& other)
+ : id_(std::move(other.id_))
+ , sequencingCenter_(std::move(other.sequencingCenter_))
+ , date_(std::move(other.date_))
+ , flowOrder_(std::move(other.flowOrder_))
+ , keySequence_(std::move(other.keySequence_))
+ , library_(std::move(other.library_))
+ , programs_(std::move(other.programs_))
+ , predictedInsertSize_(std::move(other.predictedInsertSize_))
+ , movieName_(std::move(other.movieName_))
+ , sample_(std::move(other.sample_))
+ , platformModel_(std::move(other.platformModel_))
+ , readType_(std::move(other.readType_))
+ , bindingKit_(std::move(other.bindingKit_))
+ , sequencingKit_(std::move(other.sequencingKit_))
+ , basecallerVersion_(std::move(other.basecallerVersion_))
+ , frameRateHz_(std::move(other.frameRateHz_))
+ , control_(std::move(other.control_))
+ , ipdCodec_(std::move(other.ipdCodec_))
+ , pulseWidthCodec_(std::move(other.pulseWidthCodec_))
+ , hasBarcodeData_(std::move(other.hasBarcodeData_))
+ , barcodeFile_(std::move(other.barcodeFile_))
+ , barcodeHash_(std::move(other.barcodeHash_))
+ , barcodeCount_(std::move(other.barcodeCount_))
+ , barcodeMode_(std::move(other.barcodeMode_))
+ , barcodeQuality_(std::move(other.barcodeQuality_))
+ , features_(std::move(other.features_))
+{ }
+
+ReadGroupInfo::~ReadGroupInfo(void) { }
+
+ReadGroupInfo& ReadGroupInfo::operator=(const ReadGroupInfo& other)
+{
+ id_ = other.id_;
+ sequencingCenter_ = other.sequencingCenter_;
+ date_ = other.date_;
+ flowOrder_ = other.flowOrder_;
+ keySequence_ = other.keySequence_;
+ library_ = other.library_;
+ programs_ = other.programs_;
+ platformModel_ = other.platformModel_;
+ predictedInsertSize_ = other.predictedInsertSize_;
+ movieName_ = other.movieName_;
+ sample_ = other.sample_;
+ readType_ = other.readType_;
+ bindingKit_ = other.bindingKit_;
+ sequencingKit_ = other.sequencingKit_;
+ basecallerVersion_ = other.basecallerVersion_;
+ frameRateHz_ = other.frameRateHz_;
+ control_ = other.control_;
+ ipdCodec_ = other.ipdCodec_;
+ pulseWidthCodec_ = other.pulseWidthCodec_;
+ hasBarcodeData_ = other.hasBarcodeData_;
+ barcodeFile_ = other.barcodeFile_;
+ barcodeHash_ = other.barcodeHash_;
+ barcodeCount_ = other.barcodeCount_;
+ barcodeMode_ = other.barcodeMode_;
+ barcodeQuality_ = other.barcodeQuality_;
+ features_ = other.features_;
+ return *this;
+}
+
+ReadGroupInfo& ReadGroupInfo::operator=(ReadGroupInfo&& other)
+{
+ id_ = std::move(other.id_);
+ sequencingCenter_ = std::move(other.sequencingCenter_);
+ date_ = std::move(other.date_);
+ flowOrder_ = std::move(other.flowOrder_);
+ keySequence_ = std::move(other.keySequence_);
+ library_ = std::move(other.library_);
+ programs_ = std::move(other.programs_);
+ platformModel_ = std::move(other.platformModel_);
+ predictedInsertSize_ = std::move(other.predictedInsertSize_);
+ movieName_ = std::move(other.movieName_);
+ sample_ = std::move(other.sample_);
+ readType_ = std::move(other.readType_);
+ bindingKit_ = std::move(other.bindingKit_);
+ sequencingKit_ = std::move(other.sequencingKit_);
+ basecallerVersion_ = std::move(other.basecallerVersion_);
+ frameRateHz_ = std::move(other.frameRateHz_);
+ control_ = std::move(other.control_);
+ ipdCodec_ = std::move(other.ipdCodec_);
+ pulseWidthCodec_ = std::move(other.pulseWidthCodec_);
+ hasBarcodeData_ = std::move(other.hasBarcodeData_);
+ barcodeFile_ = std::move(other.barcodeFile_);
+ barcodeHash_ = std::move(other.barcodeHash_);
+ barcodeCount_ = std::move(other.barcodeCount_);
+ barcodeMode_ = std::move(other.barcodeMode_);
+ barcodeQuality_ = std::move(other.barcodeQuality_);
+ features_ = std::move(other.features_);
+ return *this;
+}
+
+void ReadGroupInfo::DecodeSamDescription(const std::string& description)
+{
+ // split on semicolons
+ // for each, split on equal
+ // determine name ->
+
+ auto tokens = internal::Split(description, ';');
+ if (tokens.empty())
+ return;
+
+ bool hasBarcodeFile = false;
+ bool hasBarcodeHash = false;
+ bool hasBarcodeCount = false;
+ bool hasBarcodeMode = false;
+ bool hasBarcodeQuality = false;
+
+ // iterate over tokens
+ for (auto&& token : tokens) {
+
+ const auto foundEqual = token.find('=');
+ if (foundEqual == string::npos)
+ continue;
+
+ const auto key = token.substr(0,foundEqual);
+ const auto value = token.substr(foundEqual+1);
+
+ // 'mandatory' items
+ if (key == internal::token_RT) readType_ = value;
+ else if (key == internal::token_BK) bindingKit_ = value;
+ else if (key == internal::token_BV) basecallerVersion_ = value;
+ else if (key == internal::token_SK) sequencingKit_ = value;
+ else if (key == internal::token_FR) frameRateHz_ = value;
+ else if (key == internal::token_CT) control_ = (value == "TRUE");
+
+ // base features
+ else if (internal::IsBaseFeature(key))
+ features_[internal::BaseFeatureFromName(key)] = value;
+
+ // barcode data
+ else if (internal::IsLikelyBarcodeKey(key)) {
+ if (key == internal::token_BF) {
+ barcodeFile_ = value;
+ hasBarcodeFile = true;
+ }
+ else if (key == internal::token_BH) {
+ barcodeHash_ = value;
+ hasBarcodeHash = true;
+ }
+ else if (key == internal::token_BC) {
+ barcodeCount_ = static_cast<size_t>(std::stoul(value));
+ hasBarcodeCount = true;
+ }
+ else if (key == internal::token_BM) {
+ barcodeMode_ = internal::BarcodeModeFromName(value);
+ hasBarcodeMode = true;
+ }
+ else if (key == internal::token_BQ) {
+ barcodeQuality_ = internal::BarcodeQualityFromName(value);
+ hasBarcodeQuality = true;
+ }
+ }
+
+ // frame codecs
+ else {
+ const auto keyParts = internal::Split(key, ':');
+ if (keyParts.size() == 2) {
+ const auto& subkey = keyParts.at(0);
+ if (subkey == internal::feature_IP) {
+ ipdCodec_ = internal::FrameCodecFromName(keyParts.at(1));
+ features_[BaseFeature::IPD] = value;
+ }
+ else if (subkey == internal::feature_PW) {
+ pulseWidthCodec_ = internal::FrameCodecFromName(keyParts.at(1));
+ features_[BaseFeature::PULSE_WIDTH] = value;
+ }
+ }
+ }
+ }
+
+ hasBarcodeData_ = (hasBarcodeFile &&
+ hasBarcodeHash &&
+ hasBarcodeCount &&
+ hasBarcodeMode &&
+ hasBarcodeQuality);
+}
+
+std::string ReadGroupInfo::EncodeSamDescription(void) const
+{
+ auto result = string{ };
+ result.reserve(256);
+ result.append(std::string(internal::token_RT+"=" + readType_));
+
+ static const auto SEP = string{";"};
+ static const auto COLON = string{":"};
+ static const auto EQ = string{"="};
+
+ auto featureName = string{ };
+ const auto featureEnd = features_.cend();
+ auto featureIter = features_.cbegin();
+ for ( ; featureIter != featureEnd; ++featureIter ) {
+ featureName = internal::BaseFeatureName(featureIter->first);
+ if (featureName.empty() || featureIter->second.empty())
+ continue;
+ else if (featureName == internal::feature_IP) {
+ featureName.append(COLON);
+ featureName.append(internal::FrameCodecName(ipdCodec_));
+ }
+ else if (featureName == internal::feature_PW) {
+ featureName.append(COLON);
+ featureName.append(internal::FrameCodecName(pulseWidthCodec_));
+ }
+ result.append(string(SEP + featureName + EQ + featureIter->second));
+ }
+
+ if (!bindingKit_.empty()) result.append(SEP + internal::token_BK +EQ + bindingKit_);
+ if (!sequencingKit_.empty()) result.append(SEP + internal::token_SK +EQ + sequencingKit_);
+ if (!basecallerVersion_.empty()) result.append(SEP + internal::token_BV +EQ + basecallerVersion_);
+ if (!frameRateHz_.empty()) result.append(SEP + internal::token_FR +EQ + frameRateHz_);
+ if (control_) result.append(SEP + internal::token_CT +EQ + (control_ ? "TRUE"
+ : "FALSE"));
+
+ if (hasBarcodeData_) {
+ const auto barcodeData =
+ string {
+ SEP + internal::token_BF + EQ + barcodeFile_ +
+ SEP + internal::token_BH + EQ + barcodeHash_ +
+ SEP + internal::token_BC + EQ + std::to_string(barcodeCount_) +
+ SEP + internal::token_BM + EQ + internal::BarcodeModeName(barcodeMode_) +
+ SEP + internal::token_BQ + EQ + internal::BarcodeQualityName(barcodeQuality_)
+ };
+ result.reserve(result.size() + barcodeData.size());
+ result.append(barcodeData);
+ }
+
+ return result;
+}
+
+ReadGroupInfo ReadGroupInfo::FromSam(const string& sam)
+{
+ // pop off '@RG\t', then split rest of line into tokens
+ const auto tokens = internal::Split(sam.substr(4), '\t');
+ if (tokens.empty())
+ return ReadGroupInfo{ };
+
+ auto rg = ReadGroupInfo{ };
+ auto custom = map<string, string>{ };
+
+ for (auto&& token : tokens) {
+ const auto tokenTag = token.substr(0,2);
+ const auto tokenValue = token.substr(3);
+
+ // set read group info
+ if (tokenTag == internal::sam_ID) rg.Id(tokenValue);
+ else if (tokenTag == internal::sam_CN) rg.SequencingCenter(tokenValue);
+ else if (tokenTag == internal::sam_DT) rg.Date(tokenValue);
+ else if (tokenTag == internal::sam_FO) rg.FlowOrder(tokenValue);
+ else if (tokenTag == internal::sam_KS) rg.KeySequence(tokenValue);
+ else if (tokenTag == internal::sam_LB) rg.Library(tokenValue);
+ else if (tokenTag == internal::sam_PG) rg.Programs(tokenValue);
+ else if (tokenTag == internal::sam_PI) rg.PredictedInsertSize(tokenValue);
+ else if (tokenTag == internal::sam_PU) rg.MovieName(tokenValue);
+ else if (tokenTag == internal::sam_SM) rg.Sample(tokenValue);
+ else if (tokenTag == internal::sam_DS) rg.DecodeSamDescription(tokenValue);
+ else if (tokenTag == internal::sam_PM) rg.PlatformModel(internal::PlatformModelFromName(tokenValue));
+
+ // otherwise, "custom" tag
+ else
+ custom[tokenTag] = tokenValue;
+ }
+ rg.CustomTags(custom);
+
+ return rg;
+}
+
+string ReadGroupInfo::IntToId(const int32_t id)
+{
+ stringstream s;
+ s << std::setfill('0') << std::setw(8) << std::hex << id;
+ return s.str();
+}
+
+ReadGroupInfo& ReadGroupInfo::IpdCodec(const FrameCodec& codec,
+ const string& tag)
+{
+ // store desired codec type
+ ipdCodec_ = codec;
+
+ // update base features map
+ auto actualTag = tag;
+ if (actualTag.empty())
+ actualTag = "ip";
+ BaseFeatureTag(BaseFeature::IPD, actualTag);
+ return *this;
+}
+
+ReadGroupInfo& ReadGroupInfo::PulseWidthCodec(const FrameCodec& codec,
+ const string& tag)
+{
+ // store desired codec type
+ pulseWidthCodec_ = codec;
+
+ // update base features map
+ auto actualTag = tag;
+ if (actualTag.empty())
+ actualTag = "pw";
+ BaseFeatureTag(BaseFeature::PULSE_WIDTH, actualTag);
+ return *this;
+}
+
+string ReadGroupInfo::SequencingChemistryFromTriple(const string& bindingKit,
+ const string& sequencingKit,
+ const string& basecallerVersion)
+{
+ const auto verFields = internal::Split(basecallerVersion, '.');
+ if (verFields.size() < 2)
+ throw std::runtime_error("basecaller version too short: " + basecallerVersion);
+ const string ver = verFields.at(0) + "." + verFields.at(1);
+// const string ver{ basecallerVersion.substr(0, 3) };
+ for (const auto& row : internal::ChemistryTable) {
+ if (bindingKit == row[0] && sequencingKit == row[1] && ver == row[2])
+ return row[3];
+ }
+
+ // not found
+ throw InvalidSequencingChemistryException(bindingKit,
+ sequencingKit,
+ basecallerVersion);
+}
+
+std::string ReadGroupInfo::ToSam(void) const
+{
+ stringstream out;
+ out << "@RG"
+ << internal::MakeSamTag(internal::sam_ID, id_)
+ << internal::MakeSamTag(internal::sam_PL, Platform());
+
+ auto description = EncodeSamDescription();
+ if (!description.empty())
+ out << internal::MakeSamTag(internal::sam_DS, description);
+
+ if (!sequencingCenter_.empty()) out << internal::MakeSamTag(internal::sam_CN, sequencingCenter_);
+ if (!date_.empty()) out << internal::MakeSamTag(internal::sam_DT, date_);
+ if (!flowOrder_.empty()) out << internal::MakeSamTag(internal::sam_FO, flowOrder_);
+ if (!keySequence_.empty()) out << internal::MakeSamTag(internal::sam_KS, keySequence_);
+ if (!library_.empty()) out << internal::MakeSamTag(internal::sam_LB, library_);
+ if (!programs_.empty()) out << internal::MakeSamTag(internal::sam_PG, programs_);
+ if (!predictedInsertSize_.empty()) out << internal::MakeSamTag(internal::sam_PI, predictedInsertSize_);
+ if (!movieName_.empty()) out << internal::MakeSamTag(internal::sam_PU, movieName_);
+ if (!sample_.empty()) out << internal::MakeSamTag(internal::sam_SM, sample_);
+
+ out << internal::MakeSamTag(internal::sam_PM, internal::PlatformModelName(platformModel_));
+
+ // append any custom tags
+ auto customIter = custom_.cbegin();
+ auto customEnd = custom_.cend();
+ for ( ; customIter != customEnd; ++customIter )
+ out << internal::MakeSamTag(customIter->first, customIter->second);
+
+ return out.str();
+}
+
+std::string MakeReadGroupId(const std::string& movieName,
+ const std::string& readType)
+{
+/*{
+ MD5_CTX md5;
+ unsigned char digest[16];
+ char hexdigest[9];
+
+ MD5_Init(&md5);
+ MD5_Update(&md5, reinterpret_cast<void*>(const_cast<char*>(movieName.c_str())), movieName.size());
+ MD5_Update(&md5, reinterpret_cast<void*>(const_cast<char*>("//")), 2);
+ MD5_Update(&md5, reinterpret_cast<void*>(const_cast<char*>(readType.c_str())), readType.size());
+ MD5_Final(digest, &md5);
+
+ for (int i = 0; i < 4; ++i)
+ sprintf(&hexdigest[2*i], "%02x", digest[i]);
+
+ return std::string{hexdigest, 8};
+*/
+ return MD5Hash(movieName + "//" + readType).substr(0,8);
+}
+
+bool ReadGroupInfo::operator==(const ReadGroupInfo& other) const
+{
+ return id_ == other.id_
+ && sequencingCenter_ == other.sequencingCenter_
+ && date_ == other.date_
+ && flowOrder_ == other.flowOrder_
+ && keySequence_ == other.keySequence_
+ && library_ == other.library_
+ && programs_ == other.programs_
+ && platformModel_ == other.platformModel_
+ && predictedInsertSize_ == other.predictedInsertSize_
+ && movieName_ == other.movieName_
+ && sample_ == other.sample_
+ && readType_ == other.readType_
+ && bindingKit_ == other.bindingKit_
+ && sequencingKit_ == other.sequencingKit_
+ && basecallerVersion_ == other.basecallerVersion_
+ && frameRateHz_ == other.frameRateHz_
+ && control_ == other.control_
+ && ipdCodec_ == other.ipdCodec_
+ && pulseWidthCodec_ == other.pulseWidthCodec_
+ && hasBarcodeData_ == other.hasBarcodeData_
+ && barcodeFile_ == other.barcodeFile_
+ && barcodeHash_ == other.barcodeHash_
+ && barcodeCount_ == other.barcodeCount_
+ && barcodeMode_ == other.barcodeMode_
+ && barcodeQuality_ == other.barcodeQuality_
+ && features_.size() == other.features_.size()
+ && std::equal(features_.cbegin(),
+ features_.cend(),
+ other.features_.cbegin())
+ && custom_.size() == other.custom_.size()
+ && std::equal(custom_.begin(),
+ custom_.end(),
+ other.custom_.cbegin());
+}
+
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file SamTagCodec.h
+/// \brief Implements the SamTagCodec class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/SamTagCodec.h"
+#include "AssertUtils.h"
+#include <boost/lexical_cast.hpp>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+template<typename T>
+inline void appendSamValue(const T& value,
+ string& result,
+ bool force8BitInt = false)
+{
+ if (force8BitInt)
+ result.append(boost::lexical_cast<string>(static_cast<int>(value)));
+ else
+ result.append(boost::lexical_cast<string>(value));
+}
+
+template<typename T>
+void appendSamMultiValue(const T& container,
+ string& result,
+ bool force8BitInt = false)
+{
+ auto end = container.cend();
+ for (auto iter = container.cbegin(); iter != end; ++iter) {
+ result.append(1, ',');
+ if ( force8BitInt )
+ result.append(boost::lexical_cast<string>(static_cast<int>(*iter)));
+ else
+ result.append(boost::lexical_cast<string>(*iter));
+ }
+}
+
+static
+vector<string>& split(const string& s, char delim, vector<string>& elems)
+{
+ stringstream ss(s);
+ string item;
+ while (getline(ss, item, delim))
+ elems.push_back(item);
+ return elems;
+}
+
+static
+vector<string> split(const string& s, char delim) {
+ vector<string> elems;
+ split(s, delim, elems);
+ return elems;
+}
+
+vector<float> readFloatSamMultiValue(const string& data)
+{
+ vector<float> result;
+ char* c = (char*)data.c_str();
+ const char* end = c + data.length();
+ while (c+1 < end) {
+ const float value = strtof(c+1, &c); // c+1 to skip comma
+ result.push_back(value);
+ }
+ return result;
+}
+
+template<typename T>
+vector<T> readSignedSamMultiValue(const string& data)
+{
+ vector<T> result;
+ char* c = (char*)data.c_str();
+ const char* end = c + data.length();
+ while (c+1 < end) {
+ const T value = strtol(c+1, &c, 0); // c+1 to skip comma
+ result.push_back(value);
+ }
+
+ return result;
+}
+
+template<typename T>
+vector<T> readUnsignedSamMultiValue(const string& data)
+{
+ vector<T> result;
+ char* c = (char*)data.c_str();
+ const char* end = c + data.length();
+ while (c+1 < end) {
+ const T value = strtoul(c+1, &c, 0); // c+1 to skip comma
+ result.push_back(value);
+ }
+ return result;
+}
+
+TagCollection SamTagCodec::Decode(const string& tagString)
+{
+ TagCollection tags;
+
+ const vector<string>& tokens = split(tagString, '\t');
+ const auto end = tokens.cend();
+ for (auto iter = tokens.cbegin(); iter != end; ++iter ) {
+ const string& token = (*iter);
+ if (token.size() < 6) // TT:t:X
+ continue;
+
+ const string& name = token.substr(0, 2);
+ const char type = token.at(3);
+ const string& remainder = token.substr(5);
+ PB_ASSERT_OR_CONTINUE(!remainder.empty());
+
+ switch (type) {
+
+ // technically only 'A' is allowed in SAM chars,
+ // but we'll be a little permissive
+ case 'A' :
+ case 'a' :
+ {
+ tags[name] = Tag(static_cast<char>(remainder.at(0), TagModifier::ASCII_CHAR));
+ break;
+ }
+
+ // technically only 'i' is allowed in SAM ints, but we'll be a little
+ // permissive since SAM might be a bit more "user-edited" than BAM
+ case 'c' :
+ case 'C' :
+ case 's' :
+ case 'S' :
+ case 'i' :
+ case 'I' :
+ {
+ // check out boost::numeric cast for these conversions
+
+ // negative value (force signed int)
+ if (remainder.at(0) == '-') {
+ const int32_t x = boost::lexical_cast<int32_t>(remainder);
+ if ( x >= INT8_MIN )
+ tags[name] = static_cast<int8_t>(x);
+ else if ( x >= INT16_MIN )
+ tags[name] = static_cast<int16_t>(x);
+ else
+ tags[name] = x;
+ }
+
+ // unsigned int
+ else {
+ const uint32_t x = boost::lexical_cast<uint32_t>(remainder);
+ if ( x <= UINT8_MAX )
+ tags[name] = static_cast<uint8_t>(x);
+ else if ( x <= UINT16_MAX )
+ tags[name] = static_cast<uint16_t>(x);
+ else
+ tags[name] = x;
+ }
+ break;
+ }
+
+ case 'f' :
+ {
+ tags[name] = boost::lexical_cast<float>(remainder);
+ break;
+ }
+
+ case 'Z' :
+ {
+ tags[name] = remainder;
+ break;
+ }
+
+ case 'H' :
+ {
+ tags[name] = Tag(remainder, TagModifier::HEX_STRING);
+ break;
+ }
+
+ case 'B' :
+ {
+ const char elementType = remainder.at(0);
+ const string& arrayData = remainder.substr(1);
+ switch (elementType) {
+ case 'c' : tags[name] = readSignedSamMultiValue<int8_t>(arrayData); break;
+ case 'C' : tags[name] = readUnsignedSamMultiValue<uint8_t>(arrayData); break;
+ case 's' : tags[name] = readSignedSamMultiValue<int16_t>(arrayData); break;
+ case 'S' : tags[name] = readUnsignedSamMultiValue<uint16_t>(arrayData); break;
+ case 'i' : tags[name] = readSignedSamMultiValue<int32_t>(arrayData); break;
+ case 'I' : tags[name] = readUnsignedSamMultiValue<uint32_t>(arrayData); break;
+ case 'f' : tags[name] = readFloatSamMultiValue(arrayData); break;
+ default:
+ PB_ASSERT_OR_CONTINUE(false);
+ }
+ break;
+ }
+
+ // unsupported SAM tag type
+ default :
+ PB_ASSERT_OR_CONTINUE(false);
+ }
+ }
+
+ return tags;
+}
+
+string SamTagCodec::Encode(const TagCollection& tags)
+{
+ string result;
+ result.reserve(1024);
+
+ const auto tagEnd = tags.cend();
+ for (auto tagIter = tags.cbegin(); tagIter != tagEnd; ++tagIter) {
+ const string& name = (*tagIter).first;
+ const Tag& tag = (*tagIter).second;
+ PB_ASSERT_OR_CONTINUE(name.size() == 2);
+ if (tag.IsNull())
+ continue;
+
+ // tab separator
+ if (!result.empty())
+ result.append(1, '\t');
+
+ // "<TAG>:"
+ result.append(name);
+ result.append(1, ':');
+
+ // "<TYPE>:<DATA>" for printable, ASCII char
+ if (tag.HasModifier(TagModifier::ASCII_CHAR)) {
+ char c = tag.ToAscii();
+ if (c != '\0') {
+ result.append("A:");
+ result.append(1, c);
+ continue;
+ }
+ }
+
+ // "<TYPE>:<DATA>" for all other data
+ switch (tag.Type()) {
+ case TagDataType::INT8 : result.append("i:"); appendSamValue(tag.ToInt8(), result, true); break;
+ case TagDataType::UINT8 : result.append("i:"); appendSamValue(tag.ToUInt8(), result, true); break;
+ case TagDataType::INT16 : result.append("i:"); appendSamValue(tag.ToInt16(), result); break;
+ case TagDataType::UINT16 : result.append("i:"); appendSamValue(tag.ToUInt16(), result); break;
+ case TagDataType::INT32 : result.append("i:"); appendSamValue(tag.ToInt32(), result); break;
+ case TagDataType::UINT32 : result.append("i:"); appendSamValue(tag.ToUInt32(), result); break;
+ case TagDataType::FLOAT : result.append("f:"); appendSamValue(tag.ToFloat(), result); break;
+
+ case TagDataType::STRING :
+ {
+ result.append(tag.HasModifier(TagModifier::HEX_STRING) ? "H:" : "Z:");
+ result.append(tag.ToString());
+ break;
+ }
+
+ case TagDataType::INT8_ARRAY : result.append("B:c"); appendSamMultiValue(tag.ToInt8Array(), result, true); break;
+ case TagDataType::UINT8_ARRAY : result.append("B:C"); appendSamMultiValue(tag.ToUInt8Array(), result, true); break;
+ case TagDataType::INT16_ARRAY : result.append("B:s"); appendSamMultiValue(tag.ToInt16Array(), result); break;
+ case TagDataType::UINT16_ARRAY : result.append("B:S"); appendSamMultiValue(tag.ToUInt16Array(), result); break;
+ case TagDataType::INT32_ARRAY : result.append("B:i"); appendSamMultiValue(tag.ToInt32Array(), result); break;
+ case TagDataType::UINT32_ARRAY : result.append("B:I"); appendSamMultiValue(tag.ToUInt32Array(), result); break;
+ case TagDataType::FLOAT_ARRAY : result.append("B:f"); appendSamMultiValue(tag.ToFloatArray(), result); break;
+
+ default :
+ PB_ASSERT_OR_RETURN_VALUE(false, string());
+ }
+ }
+
+ return result;
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "pbbam/SamWriter.h"
+#include "pbbam/Validator.h"
+#include "FileProducer.h"
+#include "MemoryUtils.h"
+#include <htslib/hfile.h>
+#include <htslib/sam.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class SamWriterPrivate : public internal::FileProducer
+{
+public:
+ SamWriterPrivate(const std::string& filename,
+ const PBBAM_SHARED_PTR<bam_hdr_t> rawHeader)
+ : internal::FileProducer(filename)
+ , file_(nullptr)
+ , header_(rawHeader)
+ {
+ if (!header_)
+ throw std::runtime_error("null header");
+
+ // open file
+ const string& usingFilename = TempFilename();
+ const string& mode = string("w");
+ file_.reset(sam_open(usingFilename.c_str(), mode.c_str()));
+ if (!file_)
+ throw std::runtime_error("could not open file for writing");
+
+ // write header
+ const int ret = sam_hdr_write(file_.get(), header_.get());
+ if (ret != 0)
+ throw std::runtime_error("could not write header");
+ }
+
+ void TryFlush(void);
+ void Write(const BamRecord& record);
+
+private:
+ std::unique_ptr<samFile, internal::HtslibFileDeleter> file_;
+ PBBAM_SHARED_PTR<bam_hdr_t> header_;
+};
+
+void SamWriterPrivate::TryFlush(void)
+{
+ const auto ret = file_.get()->fp.hfile;
+ if (ret != 0)
+ throw std::runtime_error("could not flush output buffer contents");
+}
+
+void SamWriterPrivate::Write(const BamRecord& record)
+{
+#if PBBAM_AUTOVALIDATE
+ Validator::Validate(record);
+#endif
+
+ const auto rawRecord = internal::BamRecordMemory::GetRawData(record);
+
+ // store bin number
+ // min_shift=14 & n_lvls=5 are SAM/BAM "magic numbers"
+ rawRecord->core.bin = hts_reg2bin(rawRecord->core.pos,
+ bam_endpos(rawRecord.get()), 14, 5);
+
+ // write record to file
+ const int ret = sam_write1(file_.get(), header_.get(), rawRecord.get());
+ if (ret <= 0)
+ throw std::runtime_error("could not write record");
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+SamWriter::SamWriter(const string& filename, const BamHeader& header)
+ : IRecordWriter()
+ , d_(nullptr)
+{
+#if PBBAM_AUTOVALIDATE
+ Validator::Validate(header);
+#endif
+ d_.reset(new internal::SamWriterPrivate{ filename,
+ internal::BamHeaderMemory::MakeRawHeader(header)
+ });
+}
+
+SamWriter::~SamWriter(void) { }
+
+void SamWriter::TryFlush(void)
+{
+ d_->TryFlush();
+}
+
+void SamWriter::Write(const BamRecord& record)
+{
+ d_->Write(record);
+}
+
+void SamWriter::Write(const BamRecordImpl& recordImpl)
+{
+ d_->Write( BamRecord{recordImpl} );
+}
--- /dev/null
+
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file SequenceInfo.cpp
+/// \brief Implements the SequenceInfo class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/SequenceInfo.h"
+#include "SequenceUtils.h"
+#include <sstream>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static string token_SN = string("SN");
+static string token_LN = string("LN");
+static string token_AS = string("AS");
+static string token_M5 = string("M5");
+static string token_SP = string("SP");
+static string token_UR = string("UR");
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+SequenceInfo::SequenceInfo(void) { }
+
+SequenceInfo::SequenceInfo(const std::string& name,
+ const std::string& length)
+ : name_(name)
+ , length_(length)
+{ }
+
+SequenceInfo::SequenceInfo(const SequenceInfo& other)
+ : name_(other.name_)
+ , length_(other.length_)
+ , assemblyId_(other.assemblyId_)
+ , checksum_(other.checksum_)
+ , species_(other.species_)
+ , uri_(other.uri_)
+{ }
+
+SequenceInfo::SequenceInfo(SequenceInfo&& other)
+ : name_(std::move(other.name_))
+ , length_(std::move(other.length_))
+ , assemblyId_(std::move(other.assemblyId_))
+ , checksum_(std::move(other.checksum_))
+ , species_(std::move(other.species_))
+ , uri_(std::move(other.uri_))
+{ }
+
+SequenceInfo::~SequenceInfo(void) { }
+
+SequenceInfo& SequenceInfo::operator=(const SequenceInfo& other)
+{
+ name_ = other.name_;
+ length_ = other.length_;
+ assemblyId_ = other.assemblyId_;
+ checksum_ = other.checksum_;
+ species_ = other.species_;
+ uri_ = other.uri_;
+ return *this;
+}
+
+SequenceInfo& SequenceInfo::operator=(SequenceInfo&& other)
+{
+ name_ = std::move(other.name_);
+ length_ = std::move(other.length_);
+ assemblyId_ = std::move(other.assemblyId_);
+ checksum_ = std::move(other.checksum_);
+ species_ = std::move(other.species_);
+ uri_ = std::move(other.uri_);
+ return *this;
+}
+
+SequenceInfo SequenceInfo::FromSam(const std::string& sam)
+{
+ // pop off '@SQ\t', then split rest of line into tokens
+ const vector<string>& tokens = internal::Split(sam.substr(4), '\t');
+ if (tokens.empty())
+ return SequenceInfo();
+
+ SequenceInfo seq;
+ map<string, string> custom;
+
+ // iterate over tokens
+ for (const string& token : tokens) {
+ const string& tokenTag = token.substr(0,2);
+ const string& tokenValue = token.substr(3);
+
+ // set sequence info
+ if (tokenTag == internal::token_SN) seq.Name(tokenValue);
+ else if (tokenTag == internal::token_LN) seq.Length(tokenValue);
+ else if (tokenTag == internal::token_AS) seq.AssemblyId(tokenValue);
+ else if (tokenTag == internal::token_M5) seq.Checksum(tokenValue);
+ else if (tokenTag == internal::token_SP) seq.Species(tokenValue);
+ else if (tokenTag == internal::token_UR) seq.Uri(tokenValue);
+
+ // otherwise, "custom" tag
+ else
+ custom[tokenTag] = tokenValue;
+ }
+
+ seq.CustomTags(custom);
+ return seq;
+}
+
+bool SequenceInfo::IsValid(void) const
+{
+ if (name_.empty())
+ return false;
+
+ // use long instead of int32_t, just to make sure we can catch overflow
+ const long l = atol(length_.c_str());
+ return l >= 0 && l <= INT32_MAX;
+}
+
+std::string SequenceInfo::ToSam(void) const
+{
+ stringstream out;
+ out << "@SQ"
+ << internal::MakeSamTag(internal::token_SN, name_);
+
+ if (!length_.empty()) out << internal::MakeSamTag(internal::token_LN, length_);
+ if (!assemblyId_.empty()) out << internal::MakeSamTag(internal::token_AS, assemblyId_);
+ if (!checksum_.empty()) out << internal::MakeSamTag(internal::token_M5, checksum_);
+ if (!species_.empty()) out << internal::MakeSamTag(internal::token_SP, species_);
+ if (!uri_.empty()) out << internal::MakeSamTag(internal::token_UR, uri_);
+
+ // append any custom tags
+ map<string, string>::const_iterator customIter = custom_.cbegin();
+ map<string, string>::const_iterator customEnd = custom_.cend();
+ for ( ; customIter != customEnd; ++customIter )
+ out << internal::MakeSamTag(customIter->first, customIter->second);
+
+ return out.str();
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef SEQUENCEUTILS_H
+#define SEQUENCEUTILS_H
+
+#include "StringUtils.h"
+#include <algorithm>
+#include <string>
+#include <ctype.h>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+inline char Complement(const char character)
+{
+ static char const complementLookup[] =
+ {
+ '\0', 'T', 'V', 'G', 'H',
+ '\0', '\0', 'C', 'D', '\0',
+ '\0', 'M', '\0', 'K', 'N',
+ '\0', '\0', '\0', 'Y', 'S',
+ 'A', 'A', 'B', 'W', '\0', 'R'
+ };
+ if (character == '-' || character == '*')
+ return character;
+ return complementLookup[toupper(character) & 0x1f];
+}
+
+//inline void Reverse(std::string& s)
+//{ std::reverse(s.begin(), s.end()); }
+
+template<typename T>
+void Reverse(T& input)
+{ std::reverse(input.begin(), input.end()); }
+
+template<typename T>
+T MaybeReverse(T&& input, bool reverse)
+{
+ if (reverse) std::reverse(input.begin(), input.end());
+ return input;
+}
+
+template<typename T>
+T Reversed(const T& input)
+{
+ T result = input;
+ Reverse(result);
+ return result;
+}
+
+//inline std::string Reversed(const std::string& input)
+//{
+// std::string result = input;
+// Reverse(result);
+// return result;
+//}
+
+inline void ReverseComplement(std::string& seq) {
+
+ std::string::iterator sIter = seq.begin();
+ std::string::iterator sEnd = seq.end();
+ for ( ; sIter != sEnd; ++sIter )
+ *sIter = Complement(*sIter);
+ Reverse(seq);
+}
+
+inline std::string MaybeReverseComplement(std::string&& seq, bool reverse)
+{
+ if (reverse) ReverseComplement(seq);
+ return seq;
+}
+
+/// Reverse complement a DNA sequence case-sensitive
+inline void ReverseComplementCaseSens(std::string& seq)
+{
+ const std::string original = seq;
+ int8_t rc_table[128] = {
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 32, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 42, 4, 4, 45, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 84, 4, 71, 4, 4, 4, 67, 4, 4, 4, 4,
+ 4, 4, 78, 4, 4, 4, 4, 4, 65, 65, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 116, 4, 103, 4, 4, 4, 99, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 97, 97, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
+ std::string reverseCompl(original.length(), 'N');
+ for (uint32_t i = 0; i < original.length(); ++i)
+ reverseCompl[original.length()-i-1] = (char)rc_table[(int8_t)original[i]];
+ seq = reverseCompl;
+}
+
+inline std::string MaybeReverseComplementCaseSens(std::string&& seq, bool reverse)
+{
+ if (reverse) ReverseComplementCaseSens(seq);
+ return seq;
+}
+
+
+inline std::string ReverseComplemented(const std::string& input)
+{
+ std::string result = input;
+ ReverseComplement(result);
+ return result;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // SEQUENCEUTILS_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef STRINGUTILS_H
+#define STRINGUTILS_H
+
+#include <algorithm>
+#include <exception>
+#include <ios>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+inline std::string MakeSamTag(const std::string& tag,
+ const std::string& value)
+{
+ return std::string('\t' + tag + ':' + value);
+}
+
+inline std::vector<std::string> Split(const std::string& line,
+ const char delim = '\t')
+{
+ std::vector<std::string> tokens;
+ std::stringstream lineStream(line);
+ std::string token;
+ while (std::getline(lineStream, token, delim))
+ tokens.push_back(token);
+ return tokens;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // STRINGUTILS_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file SubreadLengthQuery.cpp
+/// \brief Implements the SubreadLengthQuery class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/SubreadLengthQuery.h"
+#include "pbbam/PbiFilterTypes.h"
+#include "pbbam/CompositeBamReader.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+struct SubreadLengthQuery::SubreadLengthQueryPrivate
+{
+ SubreadLengthQueryPrivate(const int32_t length,
+ const Compare::Type compareType,
+ const DataSet& dataset)
+ : reader_(PbiQueryLengthFilter(length, compareType), dataset)
+ { }
+
+ PbiFilterCompositeBamReader<Compare::None> reader_; // unsorted
+};
+
+SubreadLengthQuery::SubreadLengthQuery(const int32_t length,
+ const Compare::Type compareType,
+ const DataSet& dataset)
+ : internal::IQuery()
+ , d_(new SubreadLengthQueryPrivate(length, compareType, dataset))
+{ }
+
+SubreadLengthQuery::~SubreadLengthQuery(void) { }
+
+bool SubreadLengthQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Tag.cpp
+/// \brief Defines the Tag class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Tag.h"
+#include <stdexcept>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+Tag::Tag(void) : data_(), modifier_(TagModifier::NONE) { }
+Tag::Tag(int8_t value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(uint8_t value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(int16_t value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(uint16_t value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(int32_t value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(uint32_t value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(float value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(const std::string& value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(const vector<int8_t>& value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(const vector<uint8_t>& value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(const vector<int16_t>& value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(const vector<uint16_t>& value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(const vector<int32_t>& value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(const vector<uint32_t>& value) : data_(value), modifier_(TagModifier::NONE) { }
+Tag::Tag(const vector<float>& value) : data_(value), modifier_(TagModifier::NONE) { }
+
+Tag::Tag(int8_t value, const TagModifier mod)
+ : data_(value)
+ , modifier_(mod)
+{
+ if (mod == TagModifier::HEX_STRING)
+ throw runtime_error("HEX_STRING is not a valid tag modifier for int8_t data. "
+ "It is intended for string-type data only.");
+}
+
+Tag::Tag(const std::string& value, const TagModifier mod)
+ : data_(value)
+ , modifier_(mod)
+{
+ if (mod == TagModifier::ASCII_CHAR)
+ throw runtime_error("ASCII_CHAR is not a valid tag modifier for string-type data. "
+ "To construct an ASCII char tag, use a single-quoted value (e.g. 'X' instead of \"X\")");
+}
+
+Tag::Tag(const Tag& other)
+ : data_(other.data_)
+ , modifier_(other.modifier_)
+{ }
+
+Tag::Tag(Tag&& other)
+ : data_(std::move(other.data_))
+ , modifier_(std::move(other.modifier_))
+{ }
+
+Tag::~Tag(void) { }
+
+Tag& Tag::operator=(boost::blank value) { data_ = value; return *this; }
+Tag& Tag::operator=(int8_t value) { data_ = value; return *this; }
+Tag& Tag::operator=(uint8_t value) { data_ = value; return *this; }
+Tag& Tag::operator=(int16_t value) { data_ = value; return *this; }
+Tag& Tag::operator=(uint16_t value) { data_ = value; return *this; }
+Tag& Tag::operator=(int32_t value) { data_ = value; return *this; }
+Tag& Tag::operator=(uint32_t value) { data_ = value; return *this; }
+Tag& Tag::operator=(float value) { data_ = value; return *this; }
+Tag& Tag::operator=(const std::string& value) { data_ = value; return *this; }
+Tag& Tag::operator=(const vector<int8_t>& value) { data_ = value; return *this; }
+Tag& Tag::operator=(const vector<uint8_t>& value) { data_ = value; return *this; }
+Tag& Tag::operator=(const vector<int16_t>& value) { data_ = value; return *this; }
+Tag& Tag::operator=(const vector<uint16_t>& value) { data_ = value; return *this; }
+Tag& Tag::operator=(const vector<int32_t>& value) { data_ = value; return *this; }
+Tag& Tag::operator=(const vector<uint32_t>& value) { data_ = value; return *this; }
+Tag& Tag::operator=(const vector<float>& value) { data_ = value; return *this; }
+
+Tag& Tag::operator=(const Tag& other)
+{
+ data_ = other.data_;
+ modifier_ = other.modifier_;
+ return *this;
+}
+
+Tag& Tag::operator=(Tag&& other)
+{
+ data_ = std::move(other.data_);
+ modifier_ = std::move(other.modifier_);
+ return *this;
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file TagCollection.cpp
+/// \brief Implements the TagCollection class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/TagCollection.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+bool TagCollection::Contains(const string& name) const
+{
+ return count(name) != 0;
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef TIMEUTILS_H
+#define TIMEUTILS_H
+
+#include <chrono>
+#include <stdexcept>
+#include <string>
+#include <cassert>
+#include <ctime>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+inline
+std::string ToIso8601(const std::chrono::system_clock::time_point& tp)
+{
+ // get time info
+ const time_t ttime_t = std::chrono::system_clock::to_time_t(tp);
+ const std::chrono::system_clock::time_point tp_sec = std::chrono::system_clock::from_time_t(ttime_t);
+ const std::chrono::milliseconds ms = std::chrono::duration_cast<std::chrono::milliseconds>(tp - tp_sec);
+ const std::tm* ttm = gmtime(&ttime_t); // static obj, no free needed (may not be thread-safe though)
+
+ // format output
+ char date_time_format[] = "%FT%T";
+ char date_time_str[50];
+ strftime(date_time_str, sizeof(date_time_str), date_time_format, ttm);
+ std::string result(date_time_str);
+ if (ms.count() > 0) {
+ result.append(".");
+ result.append(std::to_string(ms.count()));
+ }
+ result.append("Z");
+ return result;
+}
+
+inline
+std::string ToDataSetFormat(const std::chrono::system_clock::time_point& tp)
+{
+ // get time info
+ const time_t ttime_t = std::chrono::system_clock::to_time_t(tp);
+ const std::chrono::system_clock::time_point tp_sec = std::chrono::system_clock::from_time_t(ttime_t);
+ const std::chrono::milliseconds ms = std::chrono::duration_cast<std::chrono::milliseconds>(tp - tp_sec);
+ const std::tm* ttm = gmtime(&ttime_t); // static obj, no free needed (may not be thread-safe though)
+
+ // format output
+ char date_time_format[] = "%y%m%d_%H%M%S";
+ char date_time_str[50];
+ strftime(date_time_str, sizeof(date_time_str), date_time_format, ttm);
+ std::string result(date_time_str);
+ if (ms.count() > 0)
+ result.append(std::to_string(ms.count()));
+ return result;
+}
+
+inline
+std::chrono::system_clock::time_point CurrentTime(void)
+{ return std::chrono::system_clock::now(); }
+
+} // namespace PacBio
+} // namespace BAM
+} // namespace internal
+
+#endif // TIMEUTILS_H
--- /dev/null
+// Copyright (c) 2014, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+//#include "pbbam/UnmappedReadsQuery.h"
+//#include "pbbam/BamFile.h"
+//#include "MemoryUtils.h"
+
+//#include <iostream>
+
+//using namespace PacBio;
+//using namespace PacBio::BAM;
+//using namespace std;
+
+//UnmappedReadsQuery::UnmappedReadsQuery(const BamFile& file)
+// : QueryBase()
+//{
+// // open file
+// file_.reset(sam_open(file.Filename().c_str(), "rb"), internal::HtslibFileDeleter());
+// if (!file_) {
+// error_ = UnmappedReadsQuery::FileOpenError;
+// return;
+// }
+
+// // open index
+// index_.reset(bam_index_load(file.Filename().c_str()), internal::HtslibIndexDeleter());
+// if (!index_) {
+// error_ = UnmappedReadsQuery::IndexFileOpenError;
+// return;
+// }
+
+// // initialize query
+// iterator_.reset(bam_itr_queryi(index_.get(), HTS_IDX_NOCOOR, 0, 0), internal::HtslibIteratorDeleter());
+// if (iterator_) {
+
+// cerr << endl
+// << "UnmappedQueryReads::iterator" << endl
+// << "read_rest: " << iterator_->read_rest << endl
+// << "finished: " << iterator_->finished << endl
+// << "dummy: " << iterator_->dummy << endl
+// << "tid: " << iterator_->tid << endl
+// << "beg: " << iterator_->beg << endl
+// << "end: " << iterator_->end << endl
+// << "n_off: " << iterator_->n_off << endl
+// << "i: " << iterator_->i << endl
+// << "curr_off: " << iterator_->curr_off << endl
+// << endl;
+
+
+//// uint32_t read_rest:1, finished:1, dummy:29;
+//// int tid, beg, end, n_off, i;
+//// uint64_t curr_off;
+//// hts_pair64_t *off;
+//// hts_readrec_func *readrec;
+//// struct {
+//// int n, m;
+//// int *a;
+//// } bins;
+
+// }
+//}
+
+//bool UnmappedReadsQuery::GetNext(BamRecord& record)
+//{
+// if (error_ == UnmappedReadsQuery::NoError && iterator_) {
+// const int result = bam_itr_next(file_.get(), iterator_.get(), record.RawData().get());
+// if ( result > 0 )
+// return true;
+// else {
+// cerr << "ERROR - result: " << result << endl;
+// if ( result == -4 ) {
+
+// bam1_t* b = record.RawData().get();
+// bam1_core_t* c = &b->core;
+// bool nonBgzfErrorFound = false;
+
+// if (b->l_data < 0) {
+// cerr << "ERROR: bam1_t::l_data < 0" << endl;
+// nonBgzfErrorFound = true;
+// }
+// if (c->l_qseq < 0) {
+// cerr << "ERROR: bam1_t::core::l_qseq < 0" << endl;
+// nonBgzfErrorFound = true;
+// }
+// if (!b->data) {
+// cerr << "ERROR: bam1_t::data is null" << endl;
+// nonBgzfErrorFound = true;
+// }
+// if (!nonBgzfErrorFound)
+// cerr << "ERROR: in bam_read1(), bgzf_read(fp, b->data, b->l_data) returned unexpected value" << endl;
+// }
+// }
+// }
+// else {
+// cerr << "UnmappedReadsQuery::HasError() - " << Error() << endl;
+// }
+
+
+// return false;
+//}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ValidationErrors.cpp
+/// \brief Implements the ValidationErrors class.
+//
+// Author: Derek Barnett
+
+#include "ValidationErrors.h"
+#include "pbbam/exception/ValidationException.h"
+#include "StringUtils.h"
+#include <sstream>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+const size_t ValidationErrors::MAX;
+
+ValidationErrors::ValidationErrors(const size_t maxNumErrors)
+ : maxNumErrors_(maxNumErrors)
+ , currentNumErrors_(0)
+{
+ if (maxNumErrors_ == 0)
+ maxNumErrors_ = ValidationErrors::MAX;
+}
+
+void ValidationErrors::AddFileError(const std::string& fn,
+ const std::string& details)
+{
+ string copy = details;
+ AddFileError(fn, std::move(copy));
+}
+
+void ValidationErrors::AddFileError(const std::string& fn,
+ std::string&& details)
+{
+ fileErrors_[fn].push_back(std::move(details));
+ OnErrorAdded();
+}
+
+void ValidationErrors::AddReadGroupError(const std::string& rg,
+ const std::string& details)
+{
+ string copy = details;
+ AddReadGroupError(rg, std::move(copy));
+}
+
+void ValidationErrors::AddReadGroupError(const std::string& rg,
+ std::string&& details)
+{
+ readGroupErrors_[rg].push_back(std::move(details));
+ OnErrorAdded();
+}
+
+void ValidationErrors::AddRecordError(const std::string& name,
+ const std::string& details)
+{
+ string copy = details;
+ AddRecordError(name, std::move(copy));
+}
+
+void ValidationErrors::AddRecordError(const std::string& name,
+ std::string&& details)
+{
+ recordErrors_[name].push_back(std::move(details));
+ OnErrorAdded();
+}
+
+void ValidationErrors::AddTagLengthError(const string& name,
+ const string& tagLabel,
+ const string& tagName,
+ const size_t observed,
+ const size_t expected)
+{
+ string copy = tagLabel;
+ string copy2 = tagName;
+ AddTagLengthError(name, std::move(copy), std::move(copy2), observed, expected);
+}
+
+void ValidationErrors::AddTagLengthError(const string& name,
+ string&& tagLabel,
+ string&& tagName,
+ const size_t observed,
+ const size_t expected)
+{
+ // format
+ stringstream s;
+ s << tagLabel << " tag (" << tagName << ") length: " << observed
+ << ", does not match expected length: " << expected;
+ AddRecordError(name, s.str());
+}
+
+bool ValidationErrors::IsEmpty(void) const
+{
+ return currentNumErrors_ == 0;
+}
+
+void ValidationErrors::OnErrorAdded(void)
+{
+ ++currentNumErrors_;
+ if (currentNumErrors_ == maxNumErrors_)
+ ThrowErrors();
+}
+
+void ValidationErrors::ThrowErrors(void)
+{
+ throw ValidationException(std::move(fileErrors_),
+ std::move(readGroupErrors_),
+ std::move(recordErrors_));
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ValidationErrors.h
+/// \brief Defines the ValidationErrors class.
+//
+// Author: Derek Barnett
+
+#ifndef VALIDATIONERRORS_H
+#define VALIDATIONERRORS_H
+
+#include <limits>
+#include <map>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+/// The ValidationErrors class catches error messages accumulated during
+/// validation (see Validator).
+///
+/// Convenience methods are provided for different BAM components, to help
+/// format the displayed output.
+///
+/// A maximum number of errors can be provided at construction, and this class
+/// will automatially throw a ValidationException whenever that count is reached.
+/// Otherwise, the Validator will check IsEmpty() and call ThrowErrors() if true.
+///
+class ValidationErrors
+{
+public:
+ typedef std::vector<std::string> ErrorList;
+ typedef std::map<std::string, ErrorList> ErrorMap;
+public:
+ static const size_t MAX = std::numeric_limits<size_t>::max();
+
+public:
+ ValidationErrors(const size_t maxNumErrors = ValidationErrors::MAX);
+
+public:
+ void AddFileError(const std::string& fn, const std::string& details);
+ void AddFileError(const std::string& fn, std::string&& details);
+
+ void AddReadGroupError(const std::string& rg, const std::string& details);
+ void AddReadGroupError(const std::string& rg, std::string&& details);
+
+ void AddRecordError(const std::string& name, const std::string& details);
+ void AddRecordError(const std::string& name, std::string&& details);
+
+ void AddTagLengthError(const std::string& name,
+ const std::string& tagLabel,
+ const std::string& tagName,
+ const size_t observed,
+ const size_t expected);
+ void AddTagLengthError(const std::string& name,
+ std::string&& tagLabel,
+ std::string&& tagName,
+ const size_t observed,
+ const size_t expected);
+
+public:
+ bool IsEmpty(void) const;
+ void ThrowErrors(void);
+
+private:
+ size_t maxNumErrors_;
+ size_t currentNumErrors_;
+ ErrorMap fileErrors_;
+ ErrorMap readGroupErrors_;
+ ErrorMap recordErrors_;
+
+private:
+ void OnErrorAdded(void);
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // VALIDATIONERRORS_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ValidationException.cpp
+/// \brief Implements the ValidationException class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/exception/ValidationException.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+ValidationException::ValidationException(const ErrorMap& fileErrors,
+ const ErrorMap& readGroupErrors,
+ const ErrorMap& recordErrors)
+ : std::runtime_error("")
+ , fileErrors_(fileErrors)
+ , readGroupErrors_(readGroupErrors)
+ , recordErrors_(recordErrors)
+{
+ FormatMessage();
+}
+
+ValidationException::ValidationException(ErrorMap&& fileErrors,
+ ErrorMap&& readGroupErrors,
+ ErrorMap&& recordErrors)
+ : std::runtime_error("")
+ , fileErrors_(std::move(fileErrors))
+ , readGroupErrors_(std::move(readGroupErrors))
+ , recordErrors_(std::move(recordErrors))
+{
+ FormatMessage();
+}
+
+const ValidationException::ErrorMap& ValidationException::FileErrors(void) const
+{ return fileErrors_; }
+
+const ValidationException::ErrorMap& ValidationException::ReadGroupErrors(void) const
+{ return readGroupErrors_; }
+
+const ValidationException::ErrorMap& ValidationException::RecordErrors(void) const
+{ return recordErrors_; }
+
+const char* ValidationException::what(void) const noexcept
+{ return msg_.c_str(); }
+
+void ValidationException::FormatMessage(void)
+{
+ std::stringstream s;
+ s << "Validation failed: " << std::endl;
+
+ // file errors
+ if (!fileErrors_.empty()) {
+ auto fileIter = fileErrors_.cbegin();
+ auto fileEnd = fileErrors_.cend();
+ for ( ; fileIter != fileEnd; ++fileIter) {
+ s << " In file (" << fileIter->first << ") : " << std::endl;
+ const auto& errors = fileIter->second;
+ for (const auto& e : errors)
+ s << " " << e << std::endl;
+ }
+ }
+
+ // read group errors
+ if (!readGroupErrors_.empty()) {
+ auto rgIter = readGroupErrors_.cbegin();
+ auto rgEnd = readGroupErrors_.cend();
+ for ( ; rgIter != rgEnd; ++rgIter) {
+ s << " In read group (" << rgIter->first << ") : " << std::endl;
+ const auto& errors = rgIter->second;
+ for (const auto& e : errors)
+ s << " " << e << std::endl;
+ }
+ }
+
+ // record errors
+ if (!recordErrors_.empty()) {
+ auto recIter = recordErrors_.cbegin();
+ auto recEnd = recordErrors_.cend();
+ for ( ; recIter != recEnd; ++recIter) {
+ s << " In record (" << recIter->first << ") : " << std::endl;
+ const auto& errors = recIter->second;
+ for (const auto& e : errors)
+ s << " " << e << std::endl;
+ }
+ }
+
+ msg_ = s.str();
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Validator.cpp
+/// \brief Implements the Validator class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/Validator.h"
+
+#include "pbbam/BamFile.h"
+#include "pbbam/BamHeader.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/EntireFileQuery.h"
+#include "pbbam/ReadGroupInfo.h"
+#include "ValidationErrors.h"
+#include "Version.h"
+#include <boost/algorithm/string.hpp>
+#include <iostream>
+#include <map>
+#include <sstream>
+#include <stdexcept>
+#include <set>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+struct ilexcompare_wrapper {
+ bool operator()(const string& lhs, const string& rhs) const
+ { return boost::ilexicographical_compare(lhs, rhs); }
+};
+
+static const set<string, ilexcompare_wrapper> AcceptedSortOrders = {
+ "unknown",
+ "unsorted",
+ "queryname",
+ "coordinate"
+};
+
+static const set<string> AcceptedReadTypes = {
+ "POLYMERASE",
+ "HQREGION",
+ "SUBREAD",
+ "CCS",
+ "SCRAP",
+ "UNKNOWN"
+};
+
+static
+void ValidateReadGroup(const ReadGroupInfo& rg,
+ unique_ptr<ValidationErrors>& errors)
+{
+ const string& id = rg.Id();
+
+ // has required fields
+ if (id.empty())
+ errors->AddReadGroupError(id, "missing ID");
+ if (rg.MovieName().empty())
+ errors->AddReadGroupError(id, "missing movie name (PU tag)");
+ // 3.0.2 adds required RG:PM - do not check for now, we'll add version-aware
+ // validation down the road
+
+ // description tag has required components
+ if (rg.ReadType().empty())
+ errors->AddReadGroupError(id, "missing READTYPE in description");
+ if (rg.BindingKit().empty())
+ errors->AddReadGroupError(id, "missing BINDINGKIT in description");
+ if (rg.SequencingKit().empty())
+ errors->AddReadGroupError(id, "missing SEQUENCINGKIT in description");
+ if (rg.BasecallerVersion().empty())
+ errors->AddReadGroupError(id, "missing BASECALLERVERSION in description");
+ if (rg.FrameRateHz().empty())
+ errors->AddReadGroupError(id, "missing FRAMERATEHZ in description");
+
+ // stored ID matches expected ID (as calculated from movie & type)
+ if (!id.empty()) {
+ const auto expectedId = MakeReadGroupId(rg.MovieName(), rg.ReadType());
+ if (expectedId != id) {
+ const string msg = "stored ID: " + id +
+ " does not match computed ID: " + expectedId;
+ errors->AddReadGroupError(id, std::move(msg));
+ }
+ }
+
+ // valid read type
+ if (!rg.ReadType().empty()) {
+ if (internal::AcceptedReadTypes.find(rg.ReadType()) == internal::AcceptedReadTypes.cend())
+ errors->AddReadGroupError(id, "read type: " + rg.ReadType() + " is unknown");
+ }
+
+ // valid read chemistry (binding, sequencing, chemistry)
+ if (!rg.BindingKit().empty() &&
+ !rg.SequencingKit().empty() &&
+ !rg.BasecallerVersion().empty())
+ {
+ try {
+ auto chem = rg.SequencingChemistry();
+ (void)chem;
+ } catch (std::exception& e) {
+ errors->AddReadGroupError(id, e.what());
+ }
+ }
+
+ // frame rate convertable to floating point
+ if (!rg.FrameRateHz().empty()) {
+ try {
+ const float frameRate = stof(rg.FrameRateHz());
+ (void)frameRate;
+ } catch (std::exception& e) {
+ errors->AddReadGroupError(id, e.what());
+ }
+ }
+}
+
+static
+void ValidateHeader(const BamHeader& header,
+ const string& filename,
+ unique_ptr<ValidationErrors>& errors)
+{
+ const string& fn = filename;
+
+ // SAM/BAM version
+ try {
+ Version v(header.Version());
+ (void)v;
+ } catch (std::exception& e) {
+ errors->AddFileError(fn, string("SAM version (@HD:VN) failed: ") + e.what());
+ }
+
+ // sort order
+ const string sortOrder = header.SortOrder();
+ if (AcceptedSortOrders.find(sortOrder) == AcceptedSortOrders.end())
+ errors->AddFileError(fn, string("unknown sort order: ") + sortOrder);
+
+ // PacBio version
+ try {
+ const Version v(header.PacBioBamVersion());
+ const Version minimum(3,0,1);
+ if (v < minimum) {
+ string msg = "PacBioBAM version (@HD:pb) ";
+ msg += v.ToString();
+ msg += string{ " is older than the minimum supported version " };
+ msg += ( "(" + minimum.ToString() + ")" );
+ errors->AddFileError(fn, std::move(msg));
+ }
+ } catch (std::exception& e) {
+ errors->AddFileError(fn, string("PacBioBAM version (@HD:pb) failed to parse: ") + e.what());
+ }
+
+ // sequences?
+
+ // read groups
+ for (const ReadGroupInfo& rg : header.ReadGroups() )
+ ValidateReadGroup(rg, errors);
+}
+
+static
+void ValidateMetadata(const BamFile& file,
+ unique_ptr<ValidationErrors>& errors)
+{
+ // filename
+ const string fn = file.Filename();
+ if (fn == "-") {
+ errors->AddFileError(fn, "validation not is available for streamed BAM. Please "
+ "write to a file and run validation on it.");
+ errors->ThrowErrors(); // quit early
+ }
+ if (boost::algorithm::ends_with(fn, ".bam") ||
+ boost::algorithm::ends_with(fn, ".bam.tmp"))
+ {
+ errors->AddFileError(fn, "non-standard file extension");
+ }
+
+ // EOF
+ if (!file.HasEOF())
+ errors->AddFileError(fn, "missing end-of-file marker");
+
+ // has PBI
+ if (!file.PacBioIndexExists())
+ errors->AddFileError(fn, "missing PBI file");
+
+ // header
+ ValidateHeader(file.Header(), file.Filename(), errors);
+}
+
+void ValidateMappedRecord(const BamRecord& b,
+ unique_ptr<ValidationErrors>& errors)
+{
+ const string& name = b.FullName();
+ if (b.ReferenceStart() < 0)
+ errors->AddRecordError(name, "mapped record position is invalid");
+ if (b.ReferenceId() < 0)
+ errors->AddRecordError(name, "mapped record reference ID is invalid");
+
+ // what else??
+}
+
+void ValidateRecordCore(const BamRecord& b,
+ unique_ptr<ValidationErrors>& errors)
+{
+ const string& name = b.FullName();
+
+ if (b.Type() != RecordType::CCS) {
+ const auto qStart = b.QueryStart();
+ const auto qEnd = b.QueryEnd();
+ if (qStart >= qEnd)
+ errors->AddRecordError(name, "queryStart (qs) should be < queryEnd (qe)");
+ }
+}
+
+void ValidateRecordReadGroup(const BamRecord& b,
+ unique_ptr<ValidationErrors>& errors)
+{
+ try {
+ auto rg = b.ReadGroup();
+ (void)rg;
+ } catch (std::exception& e) {
+ errors->AddRecordError(b.FullName(), e.what());
+ }
+}
+
+void ValidateRecordRequiredTags(const BamRecord& b,
+ unique_ptr<ValidationErrors>& errors)
+{
+ const string& name = b.FullName();
+
+ if (b.Type() != RecordType::CCS) {
+
+ // qe/qs
+ const bool hasQueryStart = b.HasQueryStart();
+ const bool hasQueryEnd = b.HasQueryEnd();
+ if (hasQueryStart && hasQueryEnd) {
+ const auto qStart = b.QueryStart();
+ const auto qEnd = b.QueryEnd();
+ if (qStart >= qEnd)
+ errors->AddRecordError(name, "queryStart (qs) should be < queryEnd (qe)");
+ } else {
+ if (!hasQueryStart)
+ errors->AddRecordError(name, "missing tag: qs (queryStart)");
+ if (!hasQueryEnd)
+ errors->AddRecordError(name, "missing tag: qe (queryEnd)");
+ }
+ }
+
+ // zm
+ if (!b.HasHoleNumber())
+ errors->AddRecordError(name, "missing tag: zm (ZMW hole number)");
+
+ // np
+ if (!b.HasNumPasses())
+ errors->AddRecordError(name, "missing tag: np (num passes)");
+ else {
+ const auto numPasses = b.NumPasses();
+ if (b.Type() != RecordType::CCS && numPasses != 1)
+ errors->AddRecordError(name, "np (numPasses) tag for non-CCS records should be 1");
+ }
+
+ // rq
+ if (!b.HasReadAccuracy())
+ errors->AddRecordError(name, "missing tag: rq (read accuracy)");
+
+ // sn
+ if (!b.HasSignalToNoise())
+ errors->AddRecordError(name, "missing tag: sn (signal-to-noise ratio)");
+}
+
+void ValidateRecordTagLengths(const BamRecord& b,
+ unique_ptr<ValidationErrors>& errors)
+{
+ const string& name = b.FullName();
+ const size_t expectedLength = (b.Type() == RecordType::CCS ? b.Sequence().size()
+ : (b.QueryEnd() - b.QueryStart()));
+
+ // check "per-base"-type data lengths are compatible
+ if (b.Sequence().size() != expectedLength)
+ errors->AddRecordError(name, "sequence length does not match expected length");
+
+ if (b.HasDeletionQV()) {
+ if (b.DeletionQV().size() != expectedLength)
+ errors->AddTagLengthError(name, "DeletionQV", "dq", b.DeletionQV().size(), expectedLength);
+ }
+ if (b.HasDeletionTag()) {
+ if (b.DeletionTag().size() != expectedLength)
+ errors->AddTagLengthError(name, "DeletionTag", "dt", b.DeletionTag().size(), expectedLength);
+ }
+ if (b.HasInsertionQV()) {
+ if (b.InsertionQV().size() != expectedLength)
+ errors->AddTagLengthError(name, "InsertionQV", "iq", b.InsertionQV().size(), expectedLength);
+ }
+ if (b.HasMergeQV()) {
+ if (b.MergeQV().size() != expectedLength)
+ errors->AddTagLengthError(name, "MergeQV", "mq", b.MergeQV().size(), expectedLength);
+ }
+ if (b.HasSubstitutionQV()) {
+ if (b.SubstitutionQV().size() != expectedLength)
+ errors->AddTagLengthError(name, "SubstitutionQV", "sq", b.SubstitutionQV().size(), expectedLength);
+ }
+ if (b.HasSubstitutionTag()) {
+ if (b.SubstitutionTag().size() != expectedLength)
+ errors->AddTagLengthError(name, "SubstitutionTag", "st", b.SubstitutionTag().size(), expectedLength);
+ }
+ if (b.HasIPD()) {
+ if (b.IPD().size() != expectedLength)
+ errors->AddTagLengthError(name, "IPD", "ip", b.IPD().size(), expectedLength);
+ }
+
+ // NOTE: disabling "internal" tag checks for now, only checking "standard"
+ // PacBioBAM tags
+
+// if (b.HasAltLabelQV()) {
+// if (b.AltLabelQV().size() != expectedLength)
+// errors->AddTagLengthError(name, "AltLabelQV", "pv", b.AltLabelQV().size(), expectedLength);
+// }
+// if (b.HasAltLabelTag()) {
+// if (b.AltLabelTag().size() != expectedLength)
+// errors->AddTagLengthError(name, "AltLabelTag", "pt", b.AltLabelTag().size(), expectedLength);
+// }
+// if (b.HasLabelQV()) {
+// if (b.LabelQV().size() != expectedLength)
+// errors->AddTagLengthError(name, "LabelQV", "pq", b.LabelQV().size(), expectedLength);
+// }
+// if (b.HasPkmean()) {
+// if (b.Pkmean().size() != expectedLength)
+// errors->AddTagLengthError(name, "Pkmean", "pa", b.Pkmean().size(), expectedLength);
+// }
+// if (b.HasPkmean2()) {
+// if (b.Pkmean2().size() != expectedLength)
+// errors->AddTagLengthError(name, "Pkmean2", "ps", b.Pkmean2().size(), expectedLength);
+// }
+// if (b.HasPkmid()) {
+// if (b.Pkmid().size() != expectedLength)
+// errors->AddTagLengthError(name, "Pkmid", "pm", b.Pkmid().size(), expectedLength);
+// }
+// if (b.HasPkmid2()) {
+// if (b.Pkmid2().size() != expectedLength)
+// errors->AddTagLengthError(name, "Pkmid2", "pi", b.Pkmid2().size(), expectedLength);
+// }
+// if (b.HasPrePulseFrames()) {
+// if (b.PrePulseFrames().size() != expectedLength)
+// errors->AddTagLengthError(name, "PrePulseFrames", "pd", b.PrePulseFrames().size(), expectedLength);
+// }
+// if (b.HasPulseCall()) {
+// if (b.PulseCall().size() != expectedLength)
+// errors->AddTagLengthError(name, "PulseCall", "pc", b.PulseCall().size(), expectedLength);
+// }
+// if (b.HasPulseCallWidth()) {
+// if (b.PulseCallWidth().size() != expectedLength)
+// errors->AddTagLengthError(name, "PulseCallWidth", "px", b.PulseCallWidth().size(), expectedLength);
+// }
+// if (b.HasPulseMergeQV()) {
+// if (b.PulseMergeQV().size() != expectedLength)
+// errors->AddTagLengthError(name, "PulseMergeQV", "pg", b.PulseMergeQV().size(), expectedLength);
+// }
+// if (b.HasPulseWidth()) {
+// if (b.PulseWidth().size() != expectedLength)
+// errors->AddTagLengthError(name, "PulseWidth", "pw", b.PulseWidth().size(), expectedLength);
+// }
+}
+
+void ValidateUnmappedRecord(const BamRecord& b,
+ unique_ptr<ValidationErrors>& errors)
+{
+ const string& name = b.FullName();
+ if (b.ReferenceStart() != -1)
+ errors->AddRecordError(name, "unmapped record has a position");
+ if (b.ReferenceId() != -1)
+ errors->AddRecordError(name, "unmapped record has a reference ID");
+}
+
+static
+void ValidateRecord(const BamRecord& b,
+ unique_ptr<ValidationErrors>& errors)
+{
+ ValidateRecordCore(b, errors);
+ ValidateRecordReadGroup(b, errors);
+ ValidateRecordRequiredTags(b, errors);
+ ValidateRecordTagLengths(b, errors);
+ if (b.IsMapped())
+ ValidateMappedRecord(b, errors);
+ else
+ ValidateUnmappedRecord(b, errors);
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+void Validator::Validate(const BamHeader& header, const size_t maxErrors)
+{
+ unique_ptr<ValidationErrors> errors{ new ValidationErrors(maxErrors) };
+ internal::ValidateHeader(header, "unknown", errors);
+ if (!errors->IsEmpty())
+ errors->ThrowErrors();
+}
+
+void Validator::Validate(const ReadGroupInfo& rg, const size_t maxErrors)
+{
+ unique_ptr<ValidationErrors> errors{ new ValidationErrors(maxErrors) };
+ internal::ValidateReadGroup(rg, errors);
+ if (!errors->IsEmpty())
+ errors->ThrowErrors();
+}
+
+void Validator::Validate(const BamRecord& b, const size_t maxErrors)
+{
+ unique_ptr<ValidationErrors> errors{ new ValidationErrors(maxErrors) };
+ internal::ValidateRecord(b, errors);
+ if (!errors->IsEmpty())
+ errors->ThrowErrors();
+}
+
+void Validator::ValidateEntireFile(const BamFile& file, const size_t maxErrors)
+{
+ unique_ptr<ValidationErrors> errors{ new ValidationErrors(maxErrors) };
+ internal::ValidateMetadata(file, errors);
+
+ EntireFileQuery query(file);
+ for (const BamRecord& record : query)
+ internal::ValidateRecord(record, errors);
+
+ if (!errors->IsEmpty())
+ errors->ThrowErrors();
+}
+
+void Validator::ValidateFileMetadata(const BamFile& file, const size_t maxErrors)
+{
+ unique_ptr<ValidationErrors> errors{ new ValidationErrors(maxErrors) };
+ internal::ValidateMetadata(file, errors);
+ if (!errors->IsEmpty())
+ errors->ThrowErrors();
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Version.cpp
+/// \brief Implements the Version class.
+//
+// Author: Derek Barnett
+
+#include "Version.h"
+#include "SequenceUtils.h"
+#include <sstream>
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+const Version Version::Current = Version(3,0,3);
+const Version Version::Minimum = Version(3,0,1);
+
+// string must be "<major>.<minor>.<version>"
+Version::Version(const std::string& v)
+ : major_(0)
+ , minor_(0)
+ , revision_(0)
+{
+ // parse string
+ try {
+ const auto fields = internal::Split(v, '.');
+ const auto numFields = fields.size();
+ if (numFields == 0)
+ throw std::runtime_error("invalid version number - empty string");
+ major_ = std::stoi(fields.at(0));
+ if (numFields > 1) {
+ minor_ = std::stoi(fields.at(1));
+ if (numFields > 2 )
+ revision_ = std::stoi(fields.at(2));
+ }
+ } catch (std::exception&) {
+ auto msg = string{ "invalid version number (" + v + "): failed to parse" };
+ throw std::runtime_error(msg);
+ }
+
+ // ensure valid numbers
+ Check();
+}
+
+std::string Version::ToString(void) const
+{
+ std::stringstream s;
+ s << major_ << '.' << minor_ << '.' << revision_;
+ return s.str();
+}
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file Version.h
+/// \brief Defines the Version class.
+//
+// Author: Derek Barnett
+
+#ifndef PACBIOBAM_VERSION_H
+#define PACBIOBAM_VERSION_H
+
+#include <ostream>
+#include <stdexcept>
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class Version
+{
+public:
+ static const Version Current;
+ static const Version Minimum;
+
+public:
+ constexpr Version(void);
+
+ Version(int major, int minor, int revision);
+
+ // string must be "<major>.<minor>.<version>"
+ Version(const std::string& v);
+
+ Version(const Version& other) = default;
+ Version(Version&& other) = default;
+ Version& operator=(const Version&) = default;
+ Version& operator=(Version&&) = default;
+ ~Version(void) = default;
+
+public:
+ bool operator==(const Version& other) const;
+ bool operator!=(const Version& other) const;
+ bool operator<(const Version& other) const;
+ bool operator<=(const Version& other) const;
+ bool operator>(const Version& other) const;
+ bool operator>=(const Version& other) const;
+
+public:
+ std::string ToString(void) const;
+ operator std::string(void) const;
+
+public:
+ int Major(void) const;
+ int Minor(void) const;
+ int Revision(void) const;
+
+public:
+ Version& Major(int major);
+ Version& Minor(int minor);
+ Version& Revision(int revision);
+
+private:
+ int major_;
+ int minor_;
+ int revision_;
+
+private:
+ void Check(void) const;
+};
+
+inline std::ostream& operator<<(std::ostream& out, const Version& version)
+{
+ out << version.ToString();
+ return out;
+}
+
+inline constexpr Version::Version(void)
+ : major_(0)
+ , minor_(0)
+ , revision_(0)
+{ }
+
+inline Version::Version(int major, int minor, int revision)
+ : major_(major)
+ , minor_(minor)
+ , revision_(revision)
+{ Check(); }
+
+inline bool Version::operator==(const Version& other) const
+{
+ return major_ == other.major_ &&
+ minor_ == other.minor_ &&
+ revision_ == other.revision_;
+}
+
+inline bool Version::operator!=(const Version& other) const
+{ return !(*this == other); }
+
+inline bool Version::operator<(const Version& other) const
+{
+ // 2.* < 3.*
+ if (major_ < other.major_)
+ return true;
+
+ // 3. == 3.
+ else if (major_ == other.major_) {
+
+ // 3.1.* < 3.2.*
+ if (minor_ < other.minor_)
+ return true;
+
+ // 3.2. == 3.2.
+ else if (minor_ == other.minor_) {
+
+ // 3.2.1 < 3.2.2
+ if (revision_ < other.revision_)
+ return true;
+ }
+ }
+
+ // otherwise not less-than
+ return false;
+}
+inline bool Version::operator<=(const Version& other) const
+{ return !(*this > other); }
+
+inline bool Version::operator>(const Version& other) const
+{ return other < *this; }
+
+inline bool Version::operator>=(const Version& other) const
+{ return !(*this < other); }
+
+inline Version::operator std::string(void) const
+{ return ToString(); }
+
+inline void Version::Check(void) const
+{
+ if (major_ < 0 || minor_ < 0 || revision_ < 0)
+ throw std::runtime_error("version cannot contain negative numbers");
+}
+
+inline int Version::Major(void) const
+{ return major_; }
+
+inline Version& Version::Major(int major)
+{
+ major_ = major;
+ Check();
+ return *this;
+}
+
+inline int Version::Minor(void) const
+{ return minor_; }
+
+inline Version& Version::Minor(int minor)
+{
+ minor_ = minor;
+ Check();
+ return *this;
+}
+
+inline int Version::Revision(void) const
+{ return revision_; }
+
+inline Version& Version::Revision(int revision)
+{
+ revision_ = revision;
+ Check();
+ return *this;
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // PACBIOBAM_VERSION_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualRegionTypeMap.cpp
+/// \brief Implements the VirtualRegionTypeMap class.
+//
+// Author: Armin Töpfer
+
+#include "pbbam/virtual/VirtualRegionTypeMap.h"
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+std::map<char, VirtualRegionType> VirtualRegionTypeMap::ParseChar
+{
+ { 'A' , VirtualRegionType::ADAPTER },
+ { 'B' , VirtualRegionType::BARCODE },
+ { 'H' , VirtualRegionType::HQREGION },
+ { 'F' , VirtualRegionType::FILTERED },
+ { 'L' , VirtualRegionType::LQREGION }
+};
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualZmwBamRecord.cpp
+/// \brief Implements the VirtualZmwBamRecord class.
+//
+// Author: Armin Töpfer
+
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <vector>
+
+#include "pbbam/virtual/VirtualZmwBamRecord.h"
+#include "pbbam/virtual/VirtualRegionType.h"
+#include "pbbam/virtual/VirtualRegionTypeMap.h"
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+/// \brief Appends content of src vector to dst vector using move semantics.
+///
+/// \param[in] src Input vector that will be empty after execution
+/// \param[in,out] dst Output vector that will be appended to
+///
+template <typename T>
+inline void MoveAppend(std::vector<T>& src, std::vector<T>& dst) noexcept
+{
+ if (dst.empty())
+ {
+ dst = std::move(src);
+ }
+ else
+ {
+ dst.reserve(dst.size() + src.size());
+ std::move(src.begin(), src.end(), std::back_inserter(dst));
+ src.clear();
+ }
+}
+
+/// \brief Appends content of src vector to dst vector using move semantics.
+///
+/// \param[in] src Input vector via perfect forwarding
+/// \param[in,out] dst Output vector that will be appended to
+///
+template <typename T>
+inline void MoveAppend(std::vector<T>&& src, std::vector<T>& dst) noexcept
+{
+ if (dst.empty())
+ {
+ dst = std::move(src);
+ }
+ else
+ {
+ dst.reserve(dst.size() + src.size());
+ std::move(src.begin(), src.end(), std::back_inserter(dst));
+ src.clear();
+ }
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+VirtualZmwBamRecord::VirtualZmwBamRecord(
+ std::vector<BamRecord>&& unorderedSources, const BamHeader& header)
+ : BamRecord(header)
+ , sources_(std::move(unorderedSources))
+{
+ // Sort sources by queryStart
+ std::sort(sources_.begin(), sources_.end(),
+ [](const BamRecord& l1, const BamRecord& l2)
+ { return l1.QueryStart() < l2.QueryStart(); });
+ StitchSources();
+}
+
+bool VirtualZmwBamRecord::HasVirtualRegionType(const VirtualRegionType regionType) const
+{ return virtualRegionsMap_.find(regionType) != virtualRegionsMap_.end(); }
+
+Frames VirtualZmwBamRecord::IPDV1Frames(Orientation orientation) const
+{
+ const auto rawFrames = this->IPDRaw(orientation);
+ const std::vector<uint8_t> rawData(rawFrames.Data().begin(), rawFrames.Data().end());
+ return Frames::Decode(rawData);
+}
+
+
+void VirtualZmwBamRecord::StitchSources(void)
+{
+ const auto& firstRecord = sources_[0];
+ const auto& lastRecord = sources_[sources_.size() - 1];
+
+ std::string sequence;
+ std::string deletionTag;
+ std::string substitutionTag;
+ std::string alternativeLabelTag;
+ std::string pulseCall;
+
+ QualityValues qualities;
+ QualityValues deletionQv;
+ QualityValues insertionQv;
+ QualityValues mergeQv;
+ QualityValues pulseMergeQv;
+ QualityValues substitutionQv;
+ QualityValues labelQv;
+ QualityValues alternativeLabelQv;
+
+ Frames ipd;
+ Frames pw;
+ Frames pd;
+ Frames px;
+ std::vector<float> pa;
+ std::vector<float> pm;
+ std::vector<uint32_t> sf;
+
+ // initialize capacity
+ const auto stitchedSize = lastRecord.QueryEnd() - firstRecord.QueryStart();
+ sequence.reserve(stitchedSize);
+ deletionTag.reserve(stitchedSize);
+ substitutionTag.reserve(stitchedSize);
+ alternativeLabelTag.reserve(stitchedSize);
+ pulseCall.reserve(stitchedSize);
+ qualities.reserve(stitchedSize);
+ deletionQv.reserve(stitchedSize);
+ insertionQv.reserve(stitchedSize);
+ mergeQv.reserve(stitchedSize);
+ pulseMergeQv.reserve(stitchedSize);
+ substitutionQv.reserve(stitchedSize);
+ labelQv.reserve(stitchedSize);
+ alternativeLabelQv.reserve(stitchedSize);
+ ipd.DataRaw().reserve(stitchedSize);
+ pw.DataRaw().reserve(stitchedSize);
+ pd.DataRaw().reserve(stitchedSize);
+ px.DataRaw().reserve(stitchedSize);
+ pa.reserve(stitchedSize);
+ pm.reserve(stitchedSize);
+ sf.reserve(stitchedSize);
+
+ // Stitch using tmp vars
+ for(auto& b : sources_)
+ {
+ sequence.append(b.Sequence());
+
+ MoveAppend(b.Qualities(), qualities);
+
+ if (b.HasDeletionQV())
+ MoveAppend(std::move(b.DeletionQV()), deletionQv);
+
+ if (b.HasInsertionQV())
+ MoveAppend(std::move(b.InsertionQV()), insertionQv);
+
+ if (b.HasMergeQV())
+ MoveAppend(std::move(b.MergeQV()), mergeQv);
+
+ if (b.HasPulseMergeQV())
+ MoveAppend(std::move(b.PulseMergeQV()), pulseMergeQv);
+
+ if (b.HasSubstitutionQV())
+ MoveAppend(std::move(b.SubstitutionQV()), substitutionQv);
+
+ if (b.HasLabelQV())
+ MoveAppend(std::move(b.LabelQV()), labelQv);
+
+ if (b.HasAltLabelQV())
+ MoveAppend(std::move(b.AltLabelQV()), alternativeLabelQv);
+
+ if (b.HasDeletionTag())
+ deletionTag.append(std::move(b.DeletionTag()));
+
+ if (b.HasSubstitutionTag())
+ substitutionTag.append(std::move(b.SubstitutionTag()));
+
+ if (b.HasAltLabelTag())
+ alternativeLabelTag.append(std::move(b.AltLabelTag()));
+
+ if (b.HasPulseCall())
+ pulseCall.append(std::move(b.PulseCall()));
+
+ if (b.HasIPD())
+ MoveAppend(b.IPDRaw().DataRaw(), ipd.DataRaw());
+
+ if (b.HasPulseWidth())
+ MoveAppend(b.PulseWidthRaw().DataRaw(), pw.DataRaw());
+
+ if (b.HasPulseCallWidth())
+ MoveAppend(b.PulseCallWidth().DataRaw(), px.DataRaw());
+
+ if (b.HasPrePulseFrames())
+ MoveAppend(b.PrePulseFrames().DataRaw(), pd.DataRaw());
+
+ if (b.HasPkmid())
+ MoveAppend(b.Pkmid(), pm);
+
+ if (b.HasPkmean())
+ MoveAppend(b.Pkmean(), pa);
+
+ if (b.HasPkmid2())
+ MoveAppend(b.Pkmid2(), pm);
+
+ if (b.HasPkmean2())
+ MoveAppend(b.Pkmean2(), pa);
+
+ if (b.HasStartFrame())
+ MoveAppend(b.StartFrame(), sf);
+
+ if (b.HasScrapRegionType())
+ {
+ const VirtualRegionType regionType = b.ScrapRegionType();
+
+ if (!HasVirtualRegionType(regionType))
+ virtualRegionsMap_[regionType] = std::vector<VirtualRegion>();
+
+ virtualRegionsMap_[regionType].emplace_back(
+ regionType, b.QueryStart(), b.QueryEnd());
+ }
+
+ if (b.HasLocalContextFlags())
+ {
+ std::pair<int, int> barcodes{-1, -1};
+ if (b.HasBarcodes())
+ barcodes = b.Barcodes();
+
+ constexpr auto regionType = VirtualRegionType::SUBREAD;
+ if (!HasVirtualRegionType(regionType))
+ virtualRegionsMap_[regionType] = std::vector<VirtualRegion>();
+
+ virtualRegionsMap_[regionType].emplace_back(
+ regionType, b.QueryStart(), b.QueryEnd(), b.LocalContextFlags(),
+ barcodes.first, barcodes.second);
+ }
+
+ if (b.HasBarcodes() && !this->HasBarcodes())
+ this->Barcodes(b.Barcodes());
+
+ if (b.HasBarcodeQuality() && !this->HasBarcodeQuality())
+ this->BarcodeQuality(b.BarcodeQuality());
+
+ if (b.HasReadAccuracy() && !this->HasReadAccuracy())
+ this->ReadAccuracy(b.ReadAccuracy());
+
+ if (b.HasScrapZmwType())
+ {
+ if (!this->HasScrapZmwType())
+ this->ScrapZmwType(b.ScrapZmwType());
+ else if (this->ScrapZmwType() != b.ScrapZmwType())
+ throw std::runtime_error("ScrapZmwTypes do not match");
+ }
+ }
+
+ // ReadGroup
+ this->ReadGroup(this->header_.ReadGroups()[0]);
+
+ this->NumPasses(1);
+
+ // All records should contain the same SNR and hole number
+ if (firstRecord.HasSignalToNoise())
+ this->SignalToNoise(firstRecord.SignalToNoise());
+ this->HoleNumber(firstRecord.HoleNumber());
+
+ // QueryStart
+ this->QueryStart(firstRecord.QueryStart());
+ this->QueryEnd(lastRecord.QueryEnd());
+ this->UpdateName();
+
+ std::string qualitiesStr = qualities.Fastq();
+ if (sequence.size() == qualitiesStr.size())
+ this->Impl().SetSequenceAndQualities(sequence, qualitiesStr);
+ else
+ this->Impl().SetSequenceAndQualities(sequence);
+
+ // Tags as strings
+ if (!deletionTag.empty())
+ this->DeletionTag(deletionTag);
+ if (!substitutionTag.empty())
+ this->SubstitutionTag(substitutionTag);
+ if (!alternativeLabelTag.empty())
+ this->AltLabelTag(alternativeLabelTag);
+ if (!pulseCall.empty())
+ this->PulseCall(pulseCall);
+
+ // QVs
+ if (!deletionQv.empty())
+ this->DeletionQV(deletionQv);
+ if (!insertionQv.empty())
+ this->InsertionQV(insertionQv);
+ if (!mergeQv.empty())
+ this->MergeQV(mergeQv);
+ if (!pulseMergeQv.empty())
+ this->PulseMergeQV(pulseMergeQv);
+ if (!substitutionQv.empty())
+ this->SubstitutionQV(substitutionQv);
+ if (!labelQv.empty())
+ this->LabelQV(labelQv);
+ if (!alternativeLabelQv.empty())
+ this->AltLabelQV(alternativeLabelQv);
+
+ // 16 bit arrays
+ if (!ipd.Data().empty())
+ this->IPD(ipd, FrameEncodingType::LOSSLESS);
+ if (!pw.Data().empty())
+ this->PulseWidth(pw, FrameEncodingType::LOSSLESS);
+ if (!pa.empty())
+ this->Pkmean(pa);
+ if (!pm.empty())
+ this->Pkmid(pm);
+ if (!pd.Data().empty())
+ this->PrePulseFrames(pd, FrameEncodingType::LOSSLESS);
+ if (!px.Data().empty())
+ this->PulseCallWidth(px, FrameEncodingType::LOSSLESS);
+
+ // 32 bit arrays
+ if (!sf.empty())
+ this->StartFrame(sf);
+
+ // Determine HQREGION bases on LQREGIONS
+ if (HasVirtualRegionType(VirtualRegionType::LQREGION))
+ {
+ if (virtualRegionsMap_[VirtualRegionType::LQREGION].size() == 1)
+ {
+ const auto lq = virtualRegionsMap_[VirtualRegionType::LQREGION][0];
+ if (lq.beginPos == 0)
+ virtualRegionsMap_[VirtualRegionType::HQREGION].emplace_back(
+ VirtualRegionType::HQREGION, lq.endPos, sequence.size());
+ else if (lq.endPos == static_cast<int>(sequence.size()))
+ virtualRegionsMap_[VirtualRegionType::HQREGION].emplace_back(
+ VirtualRegionType::HQREGION, 0, lq.beginPos);
+ else
+ throw std::runtime_error("Unknown HQREGION");
+ }
+ else
+ {
+ int beginPos = 0;
+ for (const auto& lqregion : virtualRegionsMap_[VirtualRegionType::LQREGION])
+ {
+ if (lqregion.beginPos - beginPos > 0)
+ virtualRegionsMap_[VirtualRegionType::HQREGION].emplace_back(
+ VirtualRegionType::HQREGION, beginPos, lqregion.beginPos);
+ beginPos = lqregion.endPos;
+ }
+ }
+ }
+ else
+ {
+ virtualRegionsMap_[VirtualRegionType::HQREGION].emplace_back(
+ VirtualRegionType::HQREGION, 0, sequence.size());
+ }
+}
+
+
+std::map<VirtualRegionType, std::vector<VirtualRegion>>
+VirtualZmwBamRecord::VirtualRegionsMap(void) const
+{ return virtualRegionsMap_; }
+
+std::vector<VirtualRegion>
+VirtualZmwBamRecord::VirtualRegionsTable(const VirtualRegionType regionType) const
+{
+ const auto iter = virtualRegionsMap_.find(regionType);
+ if (iter != virtualRegionsMap_.cend())
+ return iter->second;
+ return std::vector<VirtualRegion>();
+}
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualZmwCompositeReader.cpp
+/// \brief Implements the VirtualZmwCompositeReader class.
+//
+// Author: Derek Barnett
+
+#include "VirtualZmwCompositeReader.h"
+#include <boost/algorithm/string.hpp>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+VirtualZmwCompositeReader::VirtualZmwCompositeReader(const DataSet& dataset)
+ : currentReader_(nullptr)
+ , filter_(PbiFilter::FromDataSet(dataset))
+{
+ // set up source queue
+ string primaryFn;
+ string scrapsFn;
+ const ExternalResources& resources = dataset.ExternalResources();
+ for (const ExternalResource& resource : resources) {
+
+ primaryFn.clear();
+ scrapsFn.clear();
+
+ // if resource is possible "primary" BAM
+ const auto& metatype = resource.MetaType();
+ if (metatype == "PacBio.SubreadFile.SubreadBamFile" ||
+ metatype == "PacBio.SubreadFile.HqRegionBamFile")
+ {
+ // possible resolve relative path
+ primaryFn = dataset.ResolvePath(resource.ResourceId());
+
+ // check for associated scraps file
+ const ExternalResources& childResources = resource.ExternalResources();
+ for (const ExternalResource& childResource : childResources) {
+ const auto& childMetatype = childResource.MetaType();
+ if (childMetatype == "PacBio.SubreadFile.ScrapsBamFile" ||
+ childMetatype == "PacBio.SubreadFile.HqScrapsBamFile")
+ {
+ // possible resolve relative path
+ scrapsFn = dataset.ResolvePath(childResource.ResourceId());
+ break;
+ }
+ }
+ }
+
+ // queue up source for later
+ if (!primaryFn.empty() && !scrapsFn.empty())
+ sources_.push_back(make_pair(primaryFn,scrapsFn));
+ }
+
+ // open first available source
+ OpenNextReader();
+}
+
+bool VirtualZmwCompositeReader::HasNext(void)
+{
+ return (currentReader_ && currentReader_->HasNext());
+}
+
+VirtualZmwBamRecord VirtualZmwCompositeReader::Next(void)
+{
+ if (currentReader_) {
+ const auto result = currentReader_->Next();
+ if (!currentReader_->HasNext())
+ OpenNextReader();
+ return result;
+ }
+
+ // no reader active
+ const string msg = { "no readers active, make sure you use "
+ "VirtualZmwCompositeReader::HasNext before "
+ "requesting next record"
+ };
+ throw std::runtime_error(msg);
+}
+
+vector<BamRecord> VirtualZmwCompositeReader::NextRaw(void)
+{
+ if (currentReader_) {
+ const auto result = currentReader_->NextRaw();
+ if (!currentReader_->HasNext())
+ OpenNextReader();
+ return result;
+ }
+
+ // no reader active
+ const string msg = { "no readers active, make sure you use "
+ "VirtualZmwCompositeReader::HasNext before "
+ "requesting next group of records"
+ };
+ throw std::runtime_error(msg);
+}
+
+void VirtualZmwCompositeReader::OpenNextReader(void)
+{
+ currentReader_.reset(nullptr);
+
+ // find next source pair with data
+ while(!sources_.empty()) {
+ const auto nextSource = sources_.front();
+ sources_.pop_front();
+
+ currentReader_.reset(new VirtualZmwReader(nextSource.first,
+ nextSource.second,
+ filter_));
+ if (currentReader_->HasNext())
+ return;
+ }
+}
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualZmwCompositeReader.h
+/// \brief Defines the VirtualZmwCompositeReader class.
+//
+// Author: Derek Barnett
+
+#ifndef VIRTUALZMWCOMPOSITEREADER_H
+#define VIRTUALZMWCOMPOSITEREADER_H
+
+#include "pbbam/DataSet.h"
+#include "pbbam/PbiFilter.h"
+#include "VirtualZmwReader.h"
+#include <deque>
+#include <memory>
+#include <string>
+#include <utility>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+/// \brief The VirtualZmwCompositeReader provides an interface for
+/// re-stitching "virtual" polymerase reads from their constituent parts,
+/// across multiple %BAM resources from a DataSet.
+///
+/// This class is essentially a DataSet-aware wrapper around
+/// VirtualZmwReader, enabling multiple resources as input. See that
+/// class's documentation for more info.
+///
+class PBBAM_EXPORT VirtualZmwCompositeReader
+{
+public:
+ /// \name Constructors & Related Methods
+ /// \{
+
+ VirtualZmwCompositeReader(const DataSet& dataset);
+
+ VirtualZmwCompositeReader(void) = delete;
+ VirtualZmwCompositeReader(const VirtualZmwCompositeReader&) = delete;
+ VirtualZmwCompositeReader(VirtualZmwCompositeReader&&) = delete;
+ VirtualZmwCompositeReader& operator=(const VirtualZmwCompositeReader&) = delete;
+ VirtualZmwCompositeReader& operator=(VirtualZmwCompositeReader&&) = delete;
+ ~VirtualZmwCompositeReader(void) = default;
+
+ /// \}
+
+public:
+ /// \name Stitched Record Reading
+ ///
+
+ /// \returns true if more ZMWs/files are available for reading.
+ bool HasNext(void);
+
+ /// \returns the next stitched polymerase read
+ VirtualZmwBamRecord Next(void);
+
+ /// \returns the next set of reads that belong to one ZMW from one %BAM
+ /// resource (a primary %BAM and/or its scraps file). This enables
+ /// stitching records in a distinct thread.
+ ///
+ std::vector<BamRecord> NextRaw(void);
+
+ /// \}
+
+private:
+ std::deque< std::pair<std::string, std::string> > sources_;
+ std::unique_ptr<VirtualZmwReader> currentReader_;
+ PbiFilter filter_;
+
+private:
+ void OpenNextReader(void);
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // VIRTUALCOMPOSITEREADER_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualZmwReader.cpp
+/// \brief Implements the VirtualZmwReader class.
+//
+// Author: Armin Töpfer
+
+#include <stdexcept>
+
+#include "VirtualZmwReader.h"
+#include "pbbam/ReadGroupInfo.h"
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+VirtualZmwReader::VirtualZmwReader(const std::string& primaryBamFilepath,
+ const std::string& scrapsBamFilepath)
+ : VirtualZmwReader(primaryBamFilepath, scrapsBamFilepath, PbiFilter{})
+{ }
+
+VirtualZmwReader::VirtualZmwReader(const std::string& primaryBamFilepath,
+ const std::string& scrapsBamFilepath,
+ const PbiFilter& filter)
+{
+ primaryBamFile_.reset(new BamFile{ primaryBamFilepath });
+ scrapsBamFile_.reset(new BamFile{ scrapsBamFilepath });
+
+ if (filter.IsEmpty()) {
+ primaryQuery_.reset(new EntireFileQuery(*primaryBamFile_));
+ scrapsQuery_.reset(new EntireFileQuery(*scrapsBamFile_));
+ }
+ else {
+ primaryQuery_.reset(new PbiFilterQuery{ filter, *primaryBamFile_ });
+ scrapsQuery_.reset(new PbiFilterQuery{ filter, *scrapsBamFile_ });
+ }
+
+ primaryIt_ = (primaryQuery_->begin());
+ scrapsIt_ = (scrapsQuery_->begin());
+
+ stitchedHeader_.reset(new BamHeader{ primaryBamFile_->Header().ToSam() });
+
+ // update stitched read group in header
+ auto readGroups = stitchedHeader_->ReadGroups();
+ if (readGroups.empty())
+ throw std::runtime_error("Bam header of the primary bam has no read groups.");
+ readGroups[0].ReadType("POLYMERASE");
+ readGroups[0].Id(readGroups[0].MovieName(), "POLYMERASE");
+ if (readGroups.size() > 1)
+ {
+ std::vector<ReadGroupInfo> singleGroup;
+ singleGroup.emplace_back(std::move(readGroups[0]));
+ readGroups = std::move(singleGroup);
+ stitchedHeader_->ClearReadGroups();
+ }
+ stitchedHeader_->ReadGroups(readGroups);
+}
+
+VirtualZmwReader::~VirtualZmwReader(void) { }
+
+bool VirtualZmwReader::HasNext(void)
+{
+ // Return true until both iterators are at the end of the query
+ return primaryIt_ != primaryQuery_->end() ||
+ scrapsIt_ != scrapsQuery_->end();
+}
+
+// This method is not thread safe
+VirtualZmwBamRecord VirtualZmwReader::Next(void)
+{ return VirtualZmwBamRecord{ NextRaw(), *stitchedHeader_ }; }
+
+std::vector<BamRecord> VirtualZmwReader::NextRaw(void)
+{
+ std::vector<BamRecord> bamRecordVec;
+
+ // Current hole number, the smallest of scraps and primary.
+ // It can be that the next ZMW is scrap only.
+ int currentHoleNumber;
+ if (primaryIt_ == primaryQuery_->end())
+ currentHoleNumber = (*scrapsIt_).HoleNumber();
+ else if (scrapsIt_ == scrapsQuery_->end())
+ currentHoleNumber = (*primaryIt_).HoleNumber();
+ else
+ currentHoleNumber = std::min((*primaryIt_).HoleNumber(),
+ (*scrapsIt_).HoleNumber());
+
+ // collect subreads or hqregions
+ while (primaryIt_ != primaryQuery_->end() &&
+ currentHoleNumber == (*primaryIt_).HoleNumber())
+ {
+ bamRecordVec.push_back(*primaryIt_++);
+ }
+
+ // collect scraps
+ while (scrapsIt_ != scrapsQuery_->end() &&
+ currentHoleNumber == (*scrapsIt_).HoleNumber())
+ {
+ bamRecordVec.push_back(*scrapsIt_++);
+ }
+
+ return bamRecordVec;
+}
+
+BamHeader VirtualZmwReader::PrimaryHeader(void) const
+{ return primaryBamFile_->Header(); }
+
+BamHeader VirtualZmwReader::ScrapsHeader(void) const
+{ return scrapsBamFile_->Header(); }
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file VirtualZmwReader.h
+/// \brief Defines the VirtualZmwReader class.
+//
+// Author: Armin Töpfer
+
+#ifndef VIRTUALZMWREADER_H
+#define VIRTUALZMWREADER_H
+
+#include <memory>
+
+#include "pbbam/BamFile.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/Config.h"
+#include "pbbam/EntireFileQuery.h"
+#include "pbbam/PbiFilter.h"
+#include "pbbam/PbiFilterQuery.h"
+#include "pbbam/virtual/VirtualZmwBamRecord.h"
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class VirtualZmwReader
+{
+public:
+ /// \brief Creates a reader that will operate on a primary %BAM file (e.g.
+ /// subread data) and a scraps file, consuming all reads.
+ ///
+ /// \param[in] primaryBamFilepath hqregion.bam or subreads.bam file path
+ /// \param[in] scrapsBamFilepath scraps.bam file path
+ ///
+ VirtualZmwReader(const std::string& primaryBamFilepath,
+ const std::string& scrapsBamFilepath);
+
+ /// \brief Creates a reader that will operate on a primary %BAM file (e.g.
+ /// subread data) and a scraps file, respecting the provided PBI
+ /// filter.
+ ///
+ /// \note All %BAM files must have a corresponding ".pbi" index file to use
+ /// the filter. You may need to call BamFile::EnsurePacBioIndexExists
+ /// before constructing the reader.
+ ///
+ /// \param[in] primaryBamFilepath hqregion.bam or subreads.bam file path
+ /// \param[in] scrapsBamFilepath scraps.bam file path
+ /// \param[in] filter PBI filter criteria
+ ///
+ VirtualZmwReader(const std::string& primaryBamFilepath,
+ const std::string& scrapsBamFilepath,
+ const PbiFilter& filter);
+
+ VirtualZmwReader(void) = delete;
+ VirtualZmwReader(const VirtualZmwReader&) = delete;
+ VirtualZmwReader(VirtualZmwReader&&) = delete;
+ VirtualZmwReader& operator=(const VirtualZmwReader&) = delete;
+ VirtualZmwReader& operator=(VirtualZmwReader&&) = delete;
+ ~VirtualZmwReader(void);
+
+public:
+
+ /// \returns the BamHeader associated with this reader's "primary" %BAM file
+ BamHeader PrimaryHeader(void) const;
+
+ /// \returns the BamHeader associated with this reader's "scraps" %BAM file
+ BamHeader ScrapsHeader(void) const;
+
+public:
+
+ /// \returns true if more ZMWs are available for reading.
+ bool HasNext(void);
+
+ /// \returns the next stitched polymerase read
+ VirtualZmwBamRecord Next(void);
+
+ /// \returns the next set of reads that belong to one ZMW.
+ /// This enables stitching records in a distinct thread.
+ ///
+ std::vector<BamRecord> NextRaw(void);
+
+private:
+ std::unique_ptr<BamFile> primaryBamFile_;
+ std::unique_ptr<BamFile> scrapsBamFile_;
+ std::unique_ptr<internal::IQuery> primaryQuery_;
+ std::unique_ptr<internal::IQuery> scrapsQuery_;
+ internal::IQuery::iterator primaryIt_;
+ internal::IQuery::iterator scrapsIt_;
+ std::unique_ptr<BamHeader> stitchedHeader_;
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // VirtualZmwREADER_H
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file WhitelistedZmwReadStitcher.cpp
+/// \brief Implements the WhitelistedZmwReadStitcher class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/virtual/WhitelistedZmwReadStitcher.h"
+#include "pbbam/PbiIndexedBamReader.h"
+#include "VirtualZmwReader.h"
+#include <cassert>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+
+struct WhitelistedZmwReadStitcher::WhitelistedZmwReadStitcherPrivate
+{
+public:
+ WhitelistedZmwReadStitcherPrivate(const vector<int32_t>& zmwWhitelist,
+ const string& primaryBamFilePath,
+ const string& scrapsBamFilePath)
+ : primaryBamFile_(new BamFile{ primaryBamFilePath })
+ , scrapsBamFile_(new BamFile{ scrapsBamFilePath })
+ , primaryReader_(new PbiIndexedBamReader{ *primaryBamFile_ })
+ , scrapsReader_(new PbiIndexedBamReader{ *scrapsBamFile_ })
+ {
+ // setup new header for stitched data
+ polyHeader_ = unique_ptr<BamHeader>(new BamHeader(primaryBamFile_->Header().ToSam()));
+ auto readGroups = polyHeader_->ReadGroups();
+ if (readGroups.empty())
+ throw runtime_error("Bam header of the primary bam has no read groups.");
+ readGroups[0].ReadType("POLYMERASE");
+ readGroups[0].Id(readGroups[0].MovieName(), "POLYMERASE");
+ if (readGroups.size() > 1)
+ {
+ vector<ReadGroupInfo> singleGroup;
+ singleGroup.emplace_back(move(readGroups[0]));
+ readGroups = move(singleGroup);
+ polyHeader_->ClearReadGroups();
+ }
+ polyHeader_->ReadGroups(readGroups);
+
+ // remove ZMWs up front, that are not found in either file
+ PreFilterZmws(zmwWhitelist);
+ }
+
+ bool HasNext(void) const
+ {
+ return !zmwWhitelist_.empty();
+ }
+
+ VirtualZmwBamRecord Next(void)
+ {
+ auto bamRecordVec = NextRaw();
+ VirtualZmwBamRecord stitched(move(bamRecordVec), *polyHeader_);
+ return stitched;
+ }
+
+ vector<BamRecord> NextRaw(void)
+ {
+ auto result = vector<BamRecord>{ };
+ if (!HasNext())
+ return result;
+
+ const auto& zmw = zmwWhitelist_.front();
+ primaryReader_->Filter(PbiZmwFilter{zmw});
+ scrapsReader_->Filter(PbiZmwFilter{zmw});
+
+ auto record = BamRecord{ };
+ while (primaryReader_->GetNext(record))
+ result.push_back(record);
+ while (scrapsReader_->GetNext(record))
+ result.push_back(record);
+
+ zmwWhitelist_.pop_front();
+ return result;
+ }
+
+ BamHeader PrimaryHeader(void) const
+ { return primaryBamFile_->Header(); }
+
+ BamHeader ScrapsHeader(void) const
+ { return scrapsBamFile_->Header(); }
+
+private:
+ unique_ptr<BamFile> primaryBamFile_;
+ unique_ptr<BamFile> scrapsBamFile_;
+ unique_ptr<PbiIndexedBamReader> primaryReader_;
+ unique_ptr<PbiIndexedBamReader> scrapsReader_;
+ unique_ptr<BamHeader> polyHeader_;
+ deque<int32_t> zmwWhitelist_;
+
+private:
+ void PreFilterZmws(const vector<int32_t>& zmwWhitelist)
+ {
+ // fetch input ZMWs
+ const PbiRawData primaryIndex(primaryBamFile_->PacBioIndexFilename());
+ const PbiRawData scrapsIndex(scrapsBamFile_->PacBioIndexFilename());
+ const auto& primaryZmws = primaryIndex.BasicData().holeNumber_;
+ const auto& scrapsZmws = scrapsIndex.BasicData().holeNumber_;
+
+ // toss them all into a set (for uniqueness & lookup here soon)
+ set<int32_t> inputZmws;
+ for (const auto& zmw : primaryZmws)
+ inputZmws.insert(zmw);
+ for (const auto& zmw : scrapsZmws)
+ inputZmws.insert(zmw);
+
+ // check our requested whitelist against files' ZMWs, keep if found
+ const auto inputEnd = inputZmws.cend();
+ for (const int32_t zmw : zmwWhitelist) {
+ if (inputZmws.find(zmw) != inputEnd)
+ zmwWhitelist_.push_back(zmw);
+ }
+ }
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+// --------------------------------
+// ZmwReadStitcher implementation
+// --------------------------------
+
+WhitelistedZmwReadStitcher::WhitelistedZmwReadStitcher(const vector<int32_t>& zmwWhitelist,
+ const string& primaryBamFilePath,
+ const string& scrapsBamFilePath)
+ : d_(new WhitelistedZmwReadStitcherPrivate(zmwWhitelist,
+ primaryBamFilePath,
+ scrapsBamFilePath))
+{ }
+
+WhitelistedZmwReadStitcher::~WhitelistedZmwReadStitcher(void) { }
+
+bool WhitelistedZmwReadStitcher::HasNext(void) const
+{ return d_->HasNext(); }
+
+VirtualZmwBamRecord WhitelistedZmwReadStitcher::Next(void)
+{ return d_->Next(); }
+
+vector<BamRecord> WhitelistedZmwReadStitcher::NextRaw(void)
+{ return d_->NextRaw(); }
+
+BamHeader WhitelistedZmwReadStitcher::PrimaryHeader(void) const
+{ return d_->PrimaryHeader(); }
+
+BamHeader WhitelistedZmwReadStitcher::ScrapsHeader(void) const
+{ return d_->ScrapsHeader(); }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "XmlReader.h"
+#include "StringUtils.h"
+#include "pugixml/pugixml.hpp"
+#include <iostream>
+#include <fstream>
+#include <memory>
+#include <vector>
+#include <cassert>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static
+void UpdateRegistry(const string& attributeName,
+ const string& attributeValue,
+ NamespaceRegistry& registry)
+{
+ vector<string> nameParts = Split(attributeName, ':');
+ assert(!nameParts.empty());
+ if (nameParts.size() > 2)
+ throw std::runtime_error("malformed xmlns attribute: " + attributeName);
+
+ const bool isDefault = (nameParts.size() == 1);
+ const XsdType& xsd = registry.XsdForUri(attributeValue);
+
+ if (isDefault)
+ registry.SetDefaultXsd(xsd);
+ else {
+ assert(nameParts.size() == 2);
+ const string& name = nameParts.at(1);
+ const string& uri = attributeValue;
+ NamespaceInfo namespaceInfo(name, uri);
+ registry.Register(xsd, namespaceInfo);
+ }
+}
+
+static
+void FromXml(const pugi::xml_node& xmlNode, DataSetElement& parent)
+{
+ // ignore non-named XML nodes
+ //
+ // pugi::xml separates XML parts into more node types than we use
+ //
+ const string& label = xmlNode.name();
+ if (label.empty())
+ return;
+
+ // label & text
+ DataSetElement e(xmlNode.name(), FromInputXml());
+ e.Text(xmlNode.text().get());
+
+ // iterate attributes
+ auto attrIter = xmlNode.attributes_begin();
+ auto attrEnd = xmlNode.attributes_end();
+ for ( ; attrIter != attrEnd; ++attrIter )
+ e.Attribute(attrIter->name(), attrIter->value());
+
+ // iterate children, recursively building up subtree
+ auto childIter = xmlNode.begin();
+ auto childEnd = xmlNode.end();
+ for ( ; childIter != childEnd; ++childIter ) {
+ pugi::xml_node childNode = *childIter;
+ FromXml(childNode, e);
+ }
+
+ // add our element to its parent
+ parent.AddChild(e);
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+std::unique_ptr<DataSetBase> XmlReader::FromStream(istream& in)
+{
+ pugi::xml_document doc;
+ const pugi::xml_parse_result& loadResult = doc.load(in);
+ if (loadResult.status != pugi::status_ok)
+ throw std::runtime_error(string("could not read XML file, error code:") + to_string(loadResult.status) );
+
+ // parse top-level attributes
+ pugi::xml_node rootNode = doc.document_element();
+ if (rootNode == pugi::xml_node())
+ throw std::runtime_error("could not fetch XML root node");
+
+ // create dataset matching type strings
+ std::unique_ptr<DataSetBase> dataset(new DataSetBase);
+ dataset->Label(rootNode.name());
+
+ // iterate attributes, capture namespace info
+ const string xmlnsPrefix("xmlns");
+ auto attrIter = rootNode.attributes_begin();
+ auto attrEnd = rootNode.attributes_end();
+ for ( ; attrIter != attrEnd; ++attrIter ) {
+ const string& name = attrIter->name();
+ const string& value = attrIter->value();
+ dataset->Attribute(name, value);
+
+ if (name.find(xmlnsPrefix) == 0)
+ UpdateRegistry(name, value, dataset->Namespaces());
+ }
+
+ // iterate children, recursively building up subtree
+ auto childIter = rootNode.begin();
+ auto childEnd = rootNode.end();
+ for ( ; childIter != childEnd; ++childIter ) {
+ pugi::xml_node childNode = *childIter;
+ internal::FromXml(childNode, *dataset.get());
+ }
+
+ return dataset;
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef XMLREADER_H
+#define XMLREADER_H
+
+#include "pbbam/DataSet.h"
+#include <iosfwd>
+#include <memory>
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+class XmlReader
+{
+public:
+ static std::unique_ptr<DataSetBase> FromStream(std::istream& in);
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // XMLREADER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "XmlWriter.h"
+#include "pbbam/DataSet.h"
+#include "pugixml/pugixml.hpp"
+#include <fstream>
+#include <iostream>
+#include <map>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace internal {
+
+static
+string Prefix(const string& input)
+{
+ const size_t colonFound = input.find(':');
+ if (colonFound == std::string::npos || colonFound == 0)
+ return string();
+ return input.substr(0, colonFound);
+}
+
+static
+string OutputName(const DataSetElement& node,
+ const NamespaceRegistry& registry)
+{
+ // if from input XML, respect the namespaces given
+ if (node.IsVerbatimLabel())
+ return node.QualifiedNameLabel();
+
+ // otherwise, probably user-generated
+ else {
+ // if no namespace prefix, prepend the appropriate one & return
+ if (node.PrefixLabel().empty()) {
+ static const string colon = ":";
+ XsdType xsdType = node.Xsd();
+ if (xsdType == XsdType::NONE)
+ xsdType = registry.XsdForElement(node.LocalNameLabel().to_string());
+ return registry.Namespace(xsdType).Name() + colon + node.LocalNameLabel().to_string();
+ }
+ // otherwise, has prefix - return full name
+ else
+ return node.QualifiedNameLabel();
+ }
+}
+
+static
+void ToXml(const DataSetElement& node,
+ const NamespaceRegistry& registry,
+ map<XsdType, string>& xsdPrefixesUsed,
+ pugi::xml_node& parentXml)
+{
+ // create child of parent, w/ label & text
+ const string& label = OutputName(node, registry);
+ if (label.empty())
+ return; // error?
+ pugi::xml_node xmlNode = parentXml.append_child(label.c_str());
+
+ if (!node.Text().empty())
+ xmlNode.text().set(node.Text().c_str());
+
+ // store XSD type for later
+ const string prefix = Prefix(label);
+ if (!prefix.empty())
+ xsdPrefixesUsed[node.Xsd()] = prefix;
+
+ // add attributes
+ auto attrIter = node.Attributes().cbegin();
+ auto attrEnd = node.Attributes().cend();
+ for ( ; attrIter != attrEnd; ++attrIter) {
+ const string& name = attrIter->first;
+ if (name.empty())
+ continue;
+ pugi::xml_attribute attr = xmlNode.append_attribute(name.c_str());
+ attr.set_value(attrIter->second.c_str());
+ }
+
+ // additional stuff later? (e.g. comments)
+
+ // iterate children, recursively building up subtree
+ auto childIter = node.Children().cbegin();
+ auto childEnd = node.Children().cend();
+ for ( ; childIter != childEnd; ++childIter) {
+ const DataSetElement& child = (*childIter);
+ ToXml(child, registry, xsdPrefixesUsed, xmlNode);
+ }
+}
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+void XmlWriter::ToStream(const DataSetBase& dataset,
+ ostream& out)
+{
+ pugi::xml_document doc;
+
+ const NamespaceRegistry& registry = dataset.Namespaces();
+
+ // create top-level dataset XML node
+ const string& label = internal::OutputName(dataset, registry);
+ if (label.empty())
+ throw std::runtime_error("could not convert dataset node to XML");
+ pugi::xml_node root = doc.append_child(label.c_str());
+
+ const string& text = dataset.Text();
+ if (!text.empty())
+ root.text().set(text.c_str());
+
+ // add top-level attributes
+ auto attrIter = dataset.Attributes().cbegin();
+ auto attrEnd = dataset.Attributes().cend();
+ for ( ; attrIter != attrEnd; ++attrIter) {
+ const string name = attrIter->first;
+ const string value = attrIter->second;
+ if (name.empty())
+ continue;
+ pugi::xml_attribute attr = root.append_attribute(name.c_str());
+ attr.set_value(value.c_str());
+ }
+
+ map<XsdType, string> xsdPrefixesUsed;
+ xsdPrefixesUsed[dataset.Xsd()] = Prefix(label);
+
+ // iterate children, recursively building up subtree
+ auto childIter = dataset.Children().cbegin();
+ auto childEnd = dataset.Children().cend();
+ for ( ; childIter != childEnd; ++childIter) {
+ const DataSetElement& child = (*childIter);
+ ToXml(child, registry, xsdPrefixesUsed, root);
+ }
+
+ // write XML to stream
+ pugi::xml_node decl = doc.prepend_child(pugi::node_declaration);
+ decl.append_attribute("version") = "1.0";
+ decl.append_attribute("encoding") = "utf-8";
+
+ // add XSD namespace attributes
+ pugi::xml_attribute xmlnsDefaultAttribute = root.attribute("xmlns");
+ if (xmlnsDefaultAttribute.empty()) {
+ xmlnsDefaultAttribute = root.append_attribute("xmlns");
+ xmlnsDefaultAttribute.set_value(registry.DefaultNamespace().Uri().c_str());
+ }
+ pugi::xml_attribute xsiAttribute = root.attribute("xmlns:xsi");
+ if (xsiAttribute.empty()) {
+ xsiAttribute = root.append_attribute("xmlns:xsi");
+ xsiAttribute.set_value("http://www.w3.org/2001/XMLSchema-instance");
+ }
+ pugi::xml_attribute xsiSchemaLocationAttribute = root.attribute("xsi:schemaLocation");
+ if (xsiSchemaLocationAttribute.empty()) {
+ xsiSchemaLocationAttribute = root.append_attribute("xsi:schemaLocation");
+ xsiSchemaLocationAttribute.set_value(registry.DefaultNamespace().Uri().c_str());
+ }
+
+ static const string xmlnsPrefix = "xmlns:";
+ map<XsdType, string>::const_iterator prefixIter = xsdPrefixesUsed.cbegin();
+ map<XsdType, string>::const_iterator prefixEnd = xsdPrefixesUsed.cend();
+ for ( ; prefixIter != prefixEnd; ++prefixIter ) {
+ const XsdType& xsd = prefixIter->first;
+ const string& prefix = prefixIter->second;
+ if (xsd == XsdType::NONE || prefix.empty())
+ continue;
+ const NamespaceInfo& nsInfo = registry.Namespace(xsd);
+ assert(nsInfo.Name() == prefix);
+ const string xmlnsName = xmlnsPrefix + prefix;
+ pugi::xml_attribute xmlnsAttribute = root.attribute(xmlnsName.c_str());
+ if (xmlnsAttribute.empty()) {
+ xmlnsAttribute = root.append_attribute(xmlnsName.c_str());
+ xmlnsAttribute.set_value(nsInfo.Uri().c_str());
+ }
+ }
+
+ // "no escapes" to allow explicit ">" "<" comparison operators in filter parameters
+ // we may remove this if/when comparison is separated from the value
+ doc.save(out, "\t", pugi::format_default | pugi::format_no_escapes, pugi::encoding_utf8);
+}
+
+void XmlWriter::ToStream(const unique_ptr<DataSetBase>& dataset,
+ ostream& out)
+{ ToStream(*dataset.get(), out); }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef XMLWRITER_H
+#define XMLWRITER_H
+
+#include <iosfwd>
+#include <memory>
+
+namespace PacBio {
+namespace BAM {
+
+class DataSetBase;
+
+namespace internal {
+
+class XmlWriter
+{
+public:
+ static void ToStream(const DataSetBase& dataset, std::ostream& out);
+ static void ToStream(const std::unique_ptr<DataSetBase>& dataset, std::ostream& out);
+};
+
+} // namespace internal
+} // namespace BAM
+} // namespace PacBio
+
+#endif // XMLWRITER_H
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwQuery.cpp
+/// \brief Implements the ZmwQuery class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/ZmwGroupQuery.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/CompositeBamReader.h"
+#include "pbbam/PbiFilterTypes.h"
+#include "MemoryUtils.h"
+#include <algorithm>
+#include <deque>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+struct ZmwGroupQuery::ZmwGroupQueryPrivate
+{
+ typedef PbiFilterCompositeBamReader<Compare::Zmw> ReaderType;
+ typedef std::unique_ptr<ReaderType> ReaderPtr;
+
+ ZmwGroupQueryPrivate(const std::vector<int32_t>& zmwWhitelist,
+ const DataSet& dataset)
+ : whitelist_(zmwWhitelist.cbegin(), zmwWhitelist.cend())
+ , reader_(nullptr)
+ {
+ std::sort(whitelist_.begin(), whitelist_.end());
+ whitelist_.erase(std::unique(whitelist_.begin(),
+ whitelist_.end()),
+ whitelist_.end());
+
+ if (!whitelist_.empty()) {
+ reader_ = ReaderPtr(new ReaderType(PbiZmwFilter{whitelist_.front()}, dataset));
+ whitelist_.pop_front();
+ }
+ }
+
+ bool GetNext(std::vector<BamRecord>& records)
+ {
+ records.clear();
+ if (!reader_)
+ return false;
+
+ // get all records matching ZMW
+ BamRecord r;
+ while (reader_->GetNext(r))
+ records.push_back(r);
+
+ // set next ZMW (if any left)
+ if (!whitelist_.empty()) {
+ reader_->Filter(PbiZmwFilter{whitelist_.front()});
+ whitelist_.pop_front();
+ }
+
+ // otherwise destroy reader, next iteration will return false
+ else
+ reader_.reset(nullptr);
+
+ return true;
+ }
+
+ std::deque<int32_t> whitelist_;
+ ReaderPtr reader_;
+};
+
+ZmwGroupQuery::ZmwGroupQuery(const std::vector<int32_t>& zmwWhitelist,
+ const DataSet& dataset)
+ : internal::IGroupQuery()
+ , d_(new ZmwGroupQueryPrivate(zmwWhitelist, dataset))
+{ }
+
+ZmwGroupQuery::~ZmwGroupQuery(void) { }
+
+bool ZmwGroupQuery::GetNext(std::vector<BamRecord>& records)
+{ return d_->GetNext(records); }
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwQuery.cpp
+/// \brief Implements the ZmwQuery class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/ZmwQuery.h"
+#include "pbbam/PbiFilterTypes.h"
+#include "pbbam/CompositeBamReader.h"
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+struct ZmwQuery::ZmwQueryPrivate
+{
+ ZmwQueryPrivate(const std::vector<int32_t>& zmwWhitelist,
+ const DataSet& dataset)
+ : reader_(PbiZmwFilter(zmwWhitelist), dataset)
+ { }
+
+ PbiFilterCompositeBamReader<Compare::Zmw> reader_;
+};
+
+ZmwQuery::ZmwQuery(const std::vector<int32_t>& zmwWhitelist,
+ const DataSet& dataset)
+ : internal::IQuery()
+ , d_(new ZmwQueryPrivate(zmwWhitelist, dataset))
+{ }
+
+ZmwQuery::~ZmwQuery(void) { }
+
+bool ZmwQuery::GetNext(BamRecord &r)
+{ return d_->reader_.GetNext(r); }
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwReadStitcher.cpp
+/// \brief Implements the ZmwReadStitcher class.
+//
+// Author: Derek Barnett
+
+#include "pbbam/virtual/ZmwReadStitcher.h"
+#include "pbbam/DataSet.h"
+#include "pbbam/EntireFileQuery.h"
+#include "pbbam/PbiFilter.h"
+#include "pbbam/PbiFilterQuery.h"
+#include "VirtualZmwReader.h"
+#include <deque>
+#include <stdexcept>
+#include <utility>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+
+struct ZmwReadStitcher::ZmwReadStitcherPrivate
+{
+public:
+ ZmwReadStitcherPrivate(const string& primaryBamFilePath,
+ const string& scrapsBamFilePath,
+ const PbiFilter& filter)
+ : filter_(filter)
+ {
+ sources_.push_back(std::make_pair(primaryBamFilePath, scrapsBamFilePath));
+ OpenNextReader();
+ }
+
+ ZmwReadStitcherPrivate(const DataSet& dataset)
+ : filter_(PbiFilter::FromDataSet(dataset))
+ {
+ // set up source queue
+ string primaryFn;
+ string scrapsFn;
+ const ExternalResources& resources = dataset.ExternalResources();
+ for (const ExternalResource& resource : resources) {
+
+ primaryFn.clear();
+ scrapsFn.clear();
+
+ // if resource is possible "primary" BAM
+ const auto& metatype = resource.MetaType();
+ if (metatype == "PacBio.SubreadFile.SubreadBamFile" ||
+ metatype == "PacBio.SubreadFile.HqRegionBamFile")
+ {
+ // possible resolve relative path
+ primaryFn = dataset.ResolvePath(resource.ResourceId());
+
+ // check for associated scraps file
+ const ExternalResources& childResources = resource.ExternalResources();
+ for (const ExternalResource& childResource : childResources) {
+ const auto& childMetatype = childResource.MetaType();
+ if (childMetatype == "PacBio.SubreadFile.ScrapsBamFile" ||
+ childMetatype == "PacBio.SubreadFile.HqScrapsBamFile")
+ {
+ // possible resolve relative path
+ scrapsFn = dataset.ResolvePath(childResource.ResourceId());
+ break;
+ }
+ }
+ }
+
+ // queue up source for later
+ if (!primaryFn.empty() && !scrapsFn.empty())
+ sources_.push_back(make_pair(primaryFn,scrapsFn));
+ }
+
+ OpenNextReader();
+ }
+
+public:
+ bool HasNext(void) const
+ { return (currentReader_ && currentReader_->HasNext()); }
+
+ VirtualZmwBamRecord Next(void)
+ {
+ if (currentReader_) {
+ const auto result = currentReader_->Next();
+ if (!currentReader_->HasNext())
+ OpenNextReader();
+ return result;
+ }
+
+ // no reader active
+ const string msg = { "no readers active, make sure you use "
+ "ZmwReadStitcher::HasNext before "
+ "requesting next record"
+ };
+ throw std::runtime_error(msg);
+ }
+
+ vector<BamRecord> NextRaw(void)
+ {
+ if (currentReader_) {
+ const auto result = currentReader_->NextRaw();
+ if (!currentReader_->HasNext())
+ OpenNextReader();
+ return result;
+ }
+
+ // no reader active
+ const string msg = { "no readers active, make sure you use "
+ "ZmwReadStitcher::HasNext before "
+ "requesting next group of records"
+ };
+ throw std::runtime_error(msg);
+ }
+
+ BamHeader PrimaryHeader(void) const
+ { return currentReader_->PrimaryHeader(); }
+
+ BamHeader ScrapsHeader(void) const
+ { return currentReader_->ScrapsHeader(); }
+
+private:
+ std::deque< std::pair<std::string, std::string> > sources_;
+ std::unique_ptr<VirtualZmwReader> currentReader_;
+ PbiFilter filter_;
+
+private:
+ void OpenNextReader(void)
+ {
+ currentReader_.reset(nullptr);
+
+ // find next source pair with data
+ while(!sources_.empty()) {
+ const auto nextSource = sources_.front();
+ sources_.pop_front();
+
+ currentReader_.reset(new VirtualZmwReader(nextSource.first,
+ nextSource.second,
+ filter_));
+ if (currentReader_->HasNext())
+ return;
+ }
+ }
+};
+
+} // namespace BAM
+} // namespace PacBio
+
+// --------------------------------
+// ZmwReadStitcher implementation
+// --------------------------------
+
+ZmwReadStitcher::ZmwReadStitcher(const string& primaryBamFilePath,
+ const string& scrapsBamFilePath)
+ : ZmwReadStitcher(primaryBamFilePath,
+ scrapsBamFilePath,
+ PbiFilter{})
+{ }
+
+ZmwReadStitcher::ZmwReadStitcher(const string& primaryBamFilePath,
+ const string& scrapsBamFilePath,
+ const PbiFilter& filter)
+ : d_(new ZmwReadStitcherPrivate(primaryBamFilePath,
+ scrapsBamFilePath,
+ filter))
+{ }
+
+ZmwReadStitcher::ZmwReadStitcher(const DataSet& dataset)
+ : d_(new ZmwReadStitcherPrivate(dataset))
+{ }
+
+ZmwReadStitcher::~ZmwReadStitcher(void) { }
+
+bool ZmwReadStitcher::HasNext(void)
+{ return d_->HasNext(); }
+
+VirtualZmwBamRecord ZmwReadStitcher::Next(void)
+{ return d_->Next(); }
+
+vector<BamRecord> ZmwReadStitcher::NextRaw(void)
+{ return d_->NextRaw(); }
+
+BamHeader ZmwReadStitcher::PrimaryHeader(void) const
+{ return d_->PrimaryHeader().DeepCopy(); }
+
+BamHeader ZmwReadStitcher::ScrapsHeader(void) const
+{ return d_->ScrapsHeader().DeepCopy(); }
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+//
+// File Description
+/// \file ZmwTypeMap.cpp
+/// \brief Implements the ZmwTypeMap class.
+//
+// Author: Armin Töpfer
+
+#include "pbbam/ZmwTypeMap.h"
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+std::map<char, ZmwType> ZmwTypeMap::ParseChar
+{
+ { 'C' , ZmwType::CONTROL },
+ { 'M' , ZmwType::MALFORMED },
+ { 'N' , ZmwType::NORMAL },
+ { 'S' , ZmwType::SENTINEL }
+};
--- /dev/null
+
+# headers
+set( PacBioBAM_H
+
+ # API headers
+ ${PacBioBAM_IncludeDir}/pbbam/Accuracy.h
+ ${PacBioBAM_IncludeDir}/pbbam/AlignmentPrinter.h
+ ${PacBioBAM_IncludeDir}/pbbam/BamFile.h
+ ${PacBioBAM_IncludeDir}/pbbam/BamHeader.h
+ ${PacBioBAM_IncludeDir}/pbbam/BamRecord.h
+ ${PacBioBAM_IncludeDir}/pbbam/BamRecordBuilder.h
+ ${PacBioBAM_IncludeDir}/pbbam/BamRecordImpl.h
+ ${PacBioBAM_IncludeDir}/pbbam/BamRecordTag.h
+ ${PacBioBAM_IncludeDir}/pbbam/BamRecordView.h
+ ${PacBioBAM_IncludeDir}/pbbam/BamTagCodec.h
+ ${PacBioBAM_IncludeDir}/pbbam/BaiIndexedBamReader.h
+ ${PacBioBAM_IncludeDir}/pbbam/BamReader.h
+ ${PacBioBAM_IncludeDir}/pbbam/CompositeBamReader.h
+ ${PacBioBAM_IncludeDir}/pbbam/BamWriter.h
+ ${PacBioBAM_IncludeDir}/pbbam/BarcodeQuery.h
+ ${PacBioBAM_IncludeDir}/pbbam/Cigar.h
+ ${PacBioBAM_IncludeDir}/pbbam/CigarOperation.h
+ ${PacBioBAM_IncludeDir}/pbbam/ClipType.h
+ ${PacBioBAM_IncludeDir}/pbbam/Compare.h
+ ${PacBioBAM_IncludeDir}/pbbam/Config.h
+ ${PacBioBAM_IncludeDir}/pbbam/DataSet.h
+ ${PacBioBAM_IncludeDir}/pbbam/DataSetTypes.h
+ ${PacBioBAM_IncludeDir}/pbbam/DataSetXsd.h
+ ${PacBioBAM_IncludeDir}/pbbam/EntireFileQuery.h
+ ${PacBioBAM_IncludeDir}/pbbam/FastaReader.h
+ ${PacBioBAM_IncludeDir}/pbbam/FastaSequence.h
+ ${PacBioBAM_IncludeDir}/pbbam/FrameEncodingType.h
+ ${PacBioBAM_IncludeDir}/pbbam/Frames.h
+ ${PacBioBAM_IncludeDir}/pbbam/GenomicInterval.h
+ ${PacBioBAM_IncludeDir}/pbbam/GenomicIntervalQuery.h
+ ${PacBioBAM_IncludeDir}/pbbam/IndexedFastaReader.h
+ ${PacBioBAM_IncludeDir}/pbbam/Interval.h
+ ${PacBioBAM_IncludeDir}/pbbam/IRecordWriter.h
+ ${PacBioBAM_IncludeDir}/pbbam/LocalContextFlags.h
+ ${PacBioBAM_IncludeDir}/pbbam/MD5.h
+ ${PacBioBAM_IncludeDir}/pbbam/Orientation.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiBasicTypes.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiBuilder.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiFile.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiFilter.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiFilterQuery.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiFilterTypes.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiIndex.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiIndexedBamReader.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiLookupData.h
+ ${PacBioBAM_IncludeDir}/pbbam/PbiRawData.h
+ ${PacBioBAM_IncludeDir}/pbbam/Position.h
+ ${PacBioBAM_IncludeDir}/pbbam/ProgramInfo.h
+ ${PacBioBAM_IncludeDir}/pbbam/PulseBehavior.h
+ ${PacBioBAM_IncludeDir}/pbbam/QNameQuery.h
+ ${PacBioBAM_IncludeDir}/pbbam/QualityValue.h
+ ${PacBioBAM_IncludeDir}/pbbam/QualityValues.h
+ ${PacBioBAM_IncludeDir}/pbbam/ReadAccuracyQuery.h
+ ${PacBioBAM_IncludeDir}/pbbam/ReadGroupInfo.h
+ ${PacBioBAM_IncludeDir}/pbbam/RecordType.h
+ ${PacBioBAM_IncludeDir}/pbbam/SamTagCodec.h
+ ${PacBioBAM_IncludeDir}/pbbam/SamWriter.h
+ ${PacBioBAM_IncludeDir}/pbbam/SequenceInfo.h
+ ${PacBioBAM_IncludeDir}/pbbam/Strand.h
+ ${PacBioBAM_IncludeDir}/pbbam/SubreadLengthQuery.h
+ ${PacBioBAM_IncludeDir}/pbbam/Tag.h
+ ${PacBioBAM_IncludeDir}/pbbam/TagCollection.h
+# ${PacBioBAM_IncludeDir}/pbbam/UnmappedReadsQuery.h
+ ${PacBioBAM_IncludeDir}/pbbam/Validator.h
+ ${PacBioBAM_IncludeDir}/pbbam/ZmwGroupQuery.h
+ ${PacBioBAM_IncludeDir}/pbbam/ZmwQuery.h
+ ${PacBioBAM_IncludeDir}/pbbam/ZmwType.h
+ ${PacBioBAM_IncludeDir}/pbbam/ZmwTypeMap.h
+
+ # exception headers
+ ${PacBioBAM_IncludeDir}/pbbam/exception/InvalidSequencingChemistryException.h
+ ${PacBioBAM_IncludeDir}/pbbam/exception/ValidationException.h
+
+ # API-internal headers & inline files
+ ${PacBioBAM_IncludeDir}/pbbam/internal/Accuracy.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/BamHeader.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/BamRecord.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/BamRecordBuilder.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/BamRecordImpl.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/BamRecordView.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/Cigar.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/CigarOperation.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/Compare.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/CompositeBamReader.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/DataSet.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/DataSetBaseTypes.h
+ ${PacBioBAM_IncludeDir}/pbbam/internal/DataSetBaseTypes.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/DataSetElement.h
+ ${PacBioBAM_IncludeDir}/pbbam/internal/DataSetElement.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/DataSetListElement.h
+ ${PacBioBAM_IncludeDir}/pbbam/internal/DataSetListElement.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/DataSetTypes.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/FastaSequence.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/Frames.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/GenomicInterval.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/Interval.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/PbiBasicTypes.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/PbiFilter.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/PbiFilterTypes.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/PbiIndex.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/PbiLookupData.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/PbiRawData.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/ProgramInfo.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/QualityValue.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/QualityValues.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/QueryBase.h
+ ${PacBioBAM_IncludeDir}/pbbam/internal/QueryBase.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/ReadGroupInfo.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/SequenceInfo.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/Tag.inl
+ ${PacBioBAM_IncludeDir}/pbbam/internal/Validator.inl
+
+ # virtual headers
+ ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualPolymeraseBamRecord.h
+ ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualPolymeraseCompositeReader.h
+ ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualPolymeraseReader.h
+ ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualRegion.h
+ ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualRegionType.h
+ ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualRegionTypeMap.h
+ ${PacBioBAM_IncludeDir}/pbbam/virtual/VirtualZmwBamRecord.h
+ ${PacBioBAM_IncludeDir}/pbbam/virtual/WhitelistedZmwReadStitcher.h
+ ${PacBioBAM_IncludeDir}/pbbam/virtual/ZmwReadStitcher.h
+ ${PacBioBAM_IncludeDir}/pbbam/virtual/ZmwWhitelistVirtualReader.h
+
+ # library-internal headers
+ ${PacBioBAM_SourceDir}/AssertUtils.h
+ ${PacBioBAM_SourceDir}/BamRecordTags.h
+ ${PacBioBAM_SourceDir}/ChemistryTable.h
+ ${PacBioBAM_SourceDir}/DataSetIO.h
+ ${PacBioBAM_SourceDir}/DataSetUtils.h
+ ${PacBioBAM_SourceDir}/EnumClassHash.h
+ ${PacBioBAM_SourceDir}/FileProducer.h
+ ${PacBioBAM_SourceDir}/FileUtils.h
+ ${PacBioBAM_SourceDir}/FofnReader.h
+ ${PacBioBAM_SourceDir}/MemoryUtils.h
+ ${PacBioBAM_SourceDir}/PbiIndexIO.h
+ ${PacBioBAM_SourceDir}/Pulse2BaseCache.h
+ ${PacBioBAM_SourceDir}/SequenceUtils.h
+ ${PacBioBAM_SourceDir}/StringUtils.h
+ ${PacBioBAM_SourceDir}/TimeUtils.h
+ ${PacBioBAM_SourceDir}/ValidationErrors.h
+ ${PacBioBAM_SourceDir}/Version.h
+ ${PacBioBAM_SourceDir}/VirtualZmwCompositeReader.h
+ ${PacBioBAM_SourceDir}/VirtualZmwReader.h
+ ${PacBioBAM_SourceDir}/XmlReader.h
+ ${PacBioBAM_SourceDir}/XmlWriter.h
+ ${PacBioBAM_SourceDir}/pugixml/pugiconfig.hpp
+ ${PacBioBAM_SourceDir}/pugixml/pugixml.hpp
+)
+
+# sources
+set( PacBioBAM_CPP
+
+ ${PacBioBAM_SourceDir}/Accuracy.cpp
+ ${PacBioBAM_SourceDir}/AlignmentPrinter.cpp
+ ${PacBioBAM_SourceDir}/AssertUtils.cpp
+ ${PacBioBAM_SourceDir}/BaiIndexedBamReader.cpp
+ ${PacBioBAM_SourceDir}/BamFile.cpp
+ ${PacBioBAM_SourceDir}/BamHeader.cpp
+ ${PacBioBAM_SourceDir}/BamReader.cpp
+ ${PacBioBAM_SourceDir}/BamRecord.cpp
+ ${PacBioBAM_SourceDir}/BamRecordBuilder.cpp
+ ${PacBioBAM_SourceDir}/BamRecordImpl.cpp
+ ${PacBioBAM_SourceDir}/BamRecordTags.cpp
+ ${PacBioBAM_SourceDir}/BamTagCodec.cpp
+ ${PacBioBAM_SourceDir}/BamWriter.cpp
+ ${PacBioBAM_SourceDir}/BarcodeQuery.cpp
+ ${PacBioBAM_SourceDir}/ChemistryTable.cpp
+ ${PacBioBAM_SourceDir}/Cigar.cpp
+ ${PacBioBAM_SourceDir}/CigarOperation.cpp
+ ${PacBioBAM_SourceDir}/Compare.cpp
+ ${PacBioBAM_SourceDir}/Config.cpp
+ ${PacBioBAM_SourceDir}/DataSet.cpp
+ ${PacBioBAM_SourceDir}/DataSetBaseTypes.cpp
+ ${PacBioBAM_SourceDir}/DataSetElement.cpp
+ ${PacBioBAM_SourceDir}/DataSetIO.cpp
+ ${PacBioBAM_SourceDir}/DataSetTypes.cpp
+ ${PacBioBAM_SourceDir}/DataSetXsd.cpp
+ ${PacBioBAM_SourceDir}/EntireFileQuery.cpp
+ ${PacBioBAM_SourceDir}/FastaReader.cpp
+ ${PacBioBAM_SourceDir}/FileProducer.cpp
+ ${PacBioBAM_SourceDir}/FileUtils.cpp
+ ${PacBioBAM_SourceDir}/FofnReader.cpp
+ ${PacBioBAM_SourceDir}/Frames.cpp
+ ${PacBioBAM_SourceDir}/GenomicInterval.cpp
+ ${PacBioBAM_SourceDir}/GenomicIntervalQuery.cpp
+ ${PacBioBAM_SourceDir}/IndexedFastaReader.cpp
+ ${PacBioBAM_SourceDir}/IRecordWriter.cpp
+ ${PacBioBAM_SourceDir}/MD5.cpp
+ ${PacBioBAM_SourceDir}/MemoryUtils.cpp
+ ${PacBioBAM_SourceDir}/PbiBuilder.cpp
+ ${PacBioBAM_SourceDir}/PbiFile.cpp
+ ${PacBioBAM_SourceDir}/PbiFilter.cpp
+ ${PacBioBAM_SourceDir}/PbiFilterQuery.cpp
+ ${PacBioBAM_SourceDir}/PbiFilterTypes.cpp
+ ${PacBioBAM_SourceDir}/PbiIndex.cpp
+ ${PacBioBAM_SourceDir}/PbiIndexedBamReader.cpp
+ ${PacBioBAM_SourceDir}/PbiIndexIO.cpp
+ ${PacBioBAM_SourceDir}/PbiRawData.cpp
+ ${PacBioBAM_SourceDir}/ProgramInfo.cpp
+ ${PacBioBAM_SourceDir}/QNameQuery.cpp
+ ${PacBioBAM_SourceDir}/QualityValue.cpp
+ ${PacBioBAM_SourceDir}/ReadAccuracyQuery.cpp
+ ${PacBioBAM_SourceDir}/ReadGroupInfo.cpp
+ ${PacBioBAM_SourceDir}/SamTagCodec.cpp
+ ${PacBioBAM_SourceDir}/SamWriter.cpp
+ ${PacBioBAM_SourceDir}/SequenceInfo.cpp
+ ${PacBioBAM_SourceDir}/SubreadLengthQuery.cpp
+ ${PacBioBAM_SourceDir}/Tag.cpp
+ ${PacBioBAM_SourceDir}/TagCollection.cpp
+# ${PacBioBAM_SourceDir}/UnmappedReadsQuery.cpp
+ ${PacBioBAM_SourceDir}/Validator.cpp
+ ${PacBioBAM_SourceDir}/ValidationErrors.cpp
+ ${PacBioBAM_SourceDir}/ValidationException.cpp
+ ${PacBioBAM_SourceDir}/Version.cpp
+ ${PacBioBAM_SourceDir}/VirtualZmwBamRecord.cpp
+ ${PacBioBAM_SourceDir}/VirtualZmwCompositeReader.cpp
+ ${PacBioBAM_SourceDir}/VirtualZmwReader.cpp
+ ${PacBioBAM_SourceDir}/VirtualRegionTypeMap.cpp
+ ${PacBioBAM_SourceDir}/XmlReader.cpp
+ ${PacBioBAM_SourceDir}/XmlWriter.cpp
+ ${PacBioBAM_SourceDir}/WhitelistedZmwReadStitcher.cpp
+ ${PacBioBAM_SourceDir}/ZmwGroupQuery.cpp
+ ${PacBioBAM_SourceDir}/ZmwReadStitcher.cpp
+ ${PacBioBAM_SourceDir}/ZmwQuery.cpp
+ ${PacBioBAM_SourceDir}/ZmwTypeMap.cpp
+
+ # XML I/O
+ ${PacBioBAM_SourceDir}/pugixml/pugixml.cpp
+)
--- /dev/null
+/**
+ * pugixml parser - version 1.5
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
+ */
+
+#ifndef HEADER_PUGICONFIG_HPP
+#define HEADER_PUGICONFIG_HPP
+
+// Uncomment this to enable wchar_t mode
+// #define PUGIXML_WCHAR_MODE
+
+// Uncomment this to disable XPath
+// #define PUGIXML_NO_XPATH
+
+// Uncomment this to disable STL
+// #define PUGIXML_NO_STL
+
+// Uncomment this to disable exceptions
+// #define PUGIXML_NO_EXCEPTIONS
+
+// Set this to control attributes for public classes/functions, i.e.:
+// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
+// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
+// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
+// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
+
+// Tune these constants to adjust memory-related behavior
+// #define PUGIXML_MEMORY_PAGE_SIZE 32768
+// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
+// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
+
+// Uncomment this to switch to header-only version
+// #define PUGIXML_HEADER_ONLY
+
+// Uncomment this to enable long long support
+// #define PUGIXML_HAS_LONG_LONG
+
+#endif
+
+/**
+ * Copyright (c) 2006-2014 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
--- /dev/null
+/**
+ * pugixml parser - version 1.5
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
+ */
+
+#ifndef SOURCE_PUGIXML_CPP
+#define SOURCE_PUGIXML_CPP
+
+#include "pugixml.hpp"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#ifdef PUGIXML_WCHAR_MODE
+# include <wchar.h>
+#endif
+
+#ifndef PUGIXML_NO_XPATH
+# include <math.h>
+# include <float.h>
+# ifdef PUGIXML_NO_EXCEPTIONS
+# include <setjmp.h>
+# endif
+#endif
+
+#ifndef PUGIXML_NO_STL
+# include <istream>
+# include <ostream>
+# include <string>
+#endif
+
+// For placement new
+#include <new>
+
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable: 4127) // conditional expression is constant
+# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
+# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
+# pragma warning(disable: 4702) // unreachable code
+# pragma warning(disable: 4996) // this function or variable may be unsafe
+# pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
+#endif
+
+#ifdef __INTEL_COMPILER
+# pragma warning(disable: 177) // function was declared but never referenced
+# pragma warning(disable: 279) // controlling expression is constant
+# pragma warning(disable: 1478 1786) // function was declared "deprecated"
+# pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
+#endif
+
+#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
+# pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
+#endif
+
+#ifdef __BORLANDC__
+# pragma option push
+# pragma warn -8008 // condition is always false
+# pragma warn -8066 // unreachable code
+#endif
+
+#ifdef __SNC__
+// Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
+# pragma diag_suppress=178 // function was declared but never referenced
+# pragma diag_suppress=237 // controlling expression is constant
+#endif
+
+// Inlining controls
+#if defined(_MSC_VER) && _MSC_VER >= 1300
+# define PUGI__NO_INLINE __declspec(noinline)
+#elif defined(__GNUC__)
+# define PUGI__NO_INLINE __attribute__((noinline))
+#else
+# define PUGI__NO_INLINE
+#endif
+
+// Branch weight controls
+#if defined(__GNUC__)
+# define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
+#else
+# define PUGI__UNLIKELY(cond) (cond)
+#endif
+
+// Simple static assertion
+#define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
+
+// Digital Mars C++ bug workaround for passing char loaded from memory via stack
+#ifdef __DMC__
+# define PUGI__DMC_VOLATILE volatile
+#else
+# define PUGI__DMC_VOLATILE
+#endif
+
+// Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
+#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
+using std::memcpy;
+using std::memmove;
+#endif
+
+// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
+#if defined(_MSC_VER) && !defined(__S3E__)
+# define PUGI__MSVC_CRT_VERSION _MSC_VER
+#endif
+
+#ifdef PUGIXML_HEADER_ONLY
+# define PUGI__NS_BEGIN namespace pugi { namespace impl {
+# define PUGI__NS_END } }
+# define PUGI__FN inline
+# define PUGI__FN_NO_INLINE inline
+#else
+# if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
+# define PUGI__NS_BEGIN namespace pugi { namespace impl {
+# define PUGI__NS_END } }
+# else
+# define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
+# define PUGI__NS_END } } }
+# endif
+# define PUGI__FN
+# define PUGI__FN_NO_INLINE PUGI__NO_INLINE
+#endif
+
+// uintptr_t
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
+# include <stdint.h>
+#else
+# ifndef _UINTPTR_T_DEFINED
+// No native uintptr_t in MSVC6 and in some WinCE versions
+typedef size_t uintptr_t;
+#define _UINTPTR_T_DEFINED
+# endif
+PUGI__NS_BEGIN
+ typedef unsigned __int8 uint8_t;
+ typedef unsigned __int16 uint16_t;
+ typedef unsigned __int32 uint32_t;
+PUGI__NS_END
+#endif
+
+// Memory allocation
+PUGI__NS_BEGIN
+ PUGI__FN void* default_allocate(size_t size)
+ {
+ return malloc(size);
+ }
+
+ PUGI__FN void default_deallocate(void* ptr)
+ {
+ free(ptr);
+ }
+
+ template <typename T>
+ struct xml_memory_management_function_storage
+ {
+ static allocation_function allocate;
+ static deallocation_function deallocate;
+ };
+
+ // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
+ // Without a template<> we'll get multiple definitions of the same static
+ template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
+ template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
+
+ typedef xml_memory_management_function_storage<int> xml_memory;
+PUGI__NS_END
+
+// String utilities
+PUGI__NS_BEGIN
+ // Get string length
+ PUGI__FN size_t strlength(const char_t* s)
+ {
+ assert(s);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcslen(s);
+ #else
+ return strlen(s);
+ #endif
+ }
+
+ // Compare two strings
+ PUGI__FN bool strequal(const char_t* src, const char_t* dst)
+ {
+ assert(src && dst);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcscmp(src, dst) == 0;
+ #else
+ return strcmp(src, dst) == 0;
+ #endif
+ }
+
+ // Compare lhs with [rhs_begin, rhs_end)
+ PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
+ {
+ for (size_t i = 0; i < count; ++i)
+ if (lhs[i] != rhs[i])
+ return false;
+
+ return lhs[count] == 0;
+ }
+
+ // Get length of wide string, even if CRT lacks wide character support
+ PUGI__FN size_t strlength_wide(const wchar_t* s)
+ {
+ assert(s);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcslen(s);
+ #else
+ const wchar_t* end = s;
+ while (*end) end++;
+ return static_cast<size_t>(end - s);
+ #endif
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ // Convert string to wide string, assuming all symbols are ASCII
+ PUGI__FN void widen_ascii(wchar_t* dest, const char* source)
+ {
+ for (const char* i = source; *i; ++i) *dest++ = *i;
+ *dest = 0;
+ }
+#endif
+PUGI__NS_END
+
+#if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
+// auto_ptr-like buffer holder for exception recovery
+PUGI__NS_BEGIN
+ struct buffer_holder
+ {
+ void* data;
+ void (*deleter)(void*);
+
+ buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)
+ {
+ }
+
+ ~buffer_holder()
+ {
+ if (data) deleter(data);
+ }
+
+ void* release()
+ {
+ void* result = data;
+ data = 0;
+ return result;
+ }
+ };
+PUGI__NS_END
+#endif
+
+PUGI__NS_BEGIN
+ static const size_t xml_memory_page_size =
+ #ifdef PUGIXML_MEMORY_PAGE_SIZE
+ PUGIXML_MEMORY_PAGE_SIZE
+ #else
+ 32768
+ #endif
+ ;
+
+ static const uintptr_t xml_memory_page_alignment = 64;
+ static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
+ static const uintptr_t xml_memory_page_contents_shared_mask = 32;
+ static const uintptr_t xml_memory_page_name_allocated_mask = 16;
+ static const uintptr_t xml_memory_page_value_allocated_mask = 8;
+ static const uintptr_t xml_memory_page_type_mask = 7;
+ static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
+ static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
+
+ #define PUGI__NODETYPE(n) static_cast<xml_node_type>(((n)->header & impl::xml_memory_page_type_mask) + 1)
+
+ struct xml_allocator;
+
+ struct xml_memory_page
+ {
+ static xml_memory_page* construct(void* memory)
+ {
+ xml_memory_page* result = static_cast<xml_memory_page*>(memory);
+
+ result->allocator = 0;
+ result->prev = 0;
+ result->next = 0;
+ result->busy_size = 0;
+ result->freed_size = 0;
+
+ return result;
+ }
+
+ xml_allocator* allocator;
+
+ xml_memory_page* prev;
+ xml_memory_page* next;
+
+ size_t busy_size;
+ size_t freed_size;
+ };
+
+ struct xml_memory_string_header
+ {
+ uint16_t page_offset; // offset from page->data
+ uint16_t full_size; // 0 if string occupies whole page
+ };
+
+ struct xml_allocator
+ {
+ xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
+ {
+ }
+
+ xml_memory_page* allocate_page(size_t data_size)
+ {
+ size_t size = sizeof(xml_memory_page) + data_size;
+
+ // allocate block with some alignment, leaving memory for worst-case padding
+ void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
+ if (!memory) return 0;
+
+ // align to next page boundary (note: this guarantees at least 1 usable byte before the page)
+ char* page_memory = reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(memory) + xml_memory_page_alignment) & ~(xml_memory_page_alignment - 1));
+
+ // prepare page structure
+ xml_memory_page* page = xml_memory_page::construct(page_memory);
+ assert(page);
+
+ page->allocator = _root->allocator;
+
+ // record the offset for freeing the memory block
+ assert(page_memory > memory && page_memory - static_cast<char*>(memory) <= 127);
+ page_memory[-1] = static_cast<char>(page_memory - static_cast<char*>(memory));
+
+ return page;
+ }
+
+ static void deallocate_page(xml_memory_page* page)
+ {
+ char* page_memory = reinterpret_cast<char*>(page);
+
+ xml_memory::deallocate(page_memory - page_memory[-1]);
+ }
+
+ void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
+
+ void* allocate_memory(size_t size, xml_memory_page*& out_page)
+ {
+ if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
+
+ void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
+
+ _busy_size += size;
+
+ out_page = _root;
+
+ return buf;
+ }
+
+ void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
+ {
+ if (page == _root) page->busy_size = _busy_size;
+
+ assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
+ (void)!ptr;
+
+ page->freed_size += size;
+ assert(page->freed_size <= page->busy_size);
+
+ if (page->freed_size == page->busy_size)
+ {
+ if (page->next == 0)
+ {
+ assert(_root == page);
+
+ // top page freed, just reset sizes
+ page->busy_size = page->freed_size = 0;
+ _busy_size = 0;
+ }
+ else
+ {
+ assert(_root != page);
+ assert(page->prev);
+
+ // remove from the list
+ page->prev->next = page->next;
+ page->next->prev = page->prev;
+
+ // deallocate
+ deallocate_page(page);
+ }
+ }
+ }
+
+ char_t* allocate_string(size_t length)
+ {
+ PUGI__STATIC_ASSERT(xml_memory_page_size <= (1 << 16));
+
+ // allocate memory for string and header block
+ size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
+
+ // round size up to pointer alignment boundary
+ size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
+
+ xml_memory_page* page;
+ xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
+
+ if (!header) return 0;
+
+ // setup header
+ ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
+
+ assert(page_offset >= 0 && page_offset < (1 << 16));
+ header->page_offset = static_cast<uint16_t>(page_offset);
+
+ // full_size == 0 for large strings that occupy the whole page
+ assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
+ header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
+
+ // round-trip through void* to avoid 'cast increases required alignment of target type' warning
+ // header is guaranteed a pointer-sized alignment, which should be enough for char_t
+ return static_cast<char_t*>(static_cast<void*>(header + 1));
+ }
+
+ void deallocate_string(char_t* string)
+ {
+ // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
+ // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
+
+ // get header
+ xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
+ assert(header);
+
+ // deallocate
+ size_t page_offset = sizeof(xml_memory_page) + header->page_offset;
+ xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
+
+ // if full_size == 0 then this string occupies the whole page
+ size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
+
+ deallocate_memory(header, full_size, page);
+ }
+
+ xml_memory_page* _root;
+ size_t _busy_size;
+ };
+
+ PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
+ {
+ const size_t large_allocation_threshold = xml_memory_page_size / 4;
+
+ xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
+ out_page = page;
+
+ if (!page) return 0;
+
+ if (size <= large_allocation_threshold)
+ {
+ _root->busy_size = _busy_size;
+
+ // insert page at the end of linked list
+ page->prev = _root;
+ _root->next = page;
+ _root = page;
+
+ _busy_size = size;
+ }
+ else
+ {
+ // insert page before the end of linked list, so that it is deleted as soon as possible
+ // the last page is not deleted even if it's empty (see deallocate_memory)
+ assert(_root->prev);
+
+ page->prev = _root->prev;
+ page->next = _root;
+
+ _root->prev->next = page;
+ _root->prev = page;
+ }
+
+ // allocate inside page
+ page->busy_size = size;
+
+ return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
+ }
+PUGI__NS_END
+
+namespace pugi
+{
+ /// A 'name=value' XML attribute structure.
+ struct xml_attribute_struct
+ {
+ /// Default ctor
+ xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
+ {
+ }
+
+ uintptr_t header;
+
+ char_t* name; ///< Pointer to attribute name.
+ char_t* value; ///< Pointer to attribute value.
+
+ xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list)
+ xml_attribute_struct* next_attribute; ///< Next attribute
+ };
+
+ /// An XML document tree node.
+ struct xml_node_struct
+ {
+ /// Default ctor
+ /// \param type - node type
+ xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
+ {
+ }
+
+ uintptr_t header;
+
+ xml_node_struct* parent; ///< Pointer to parent
+
+ char_t* name; ///< Pointer to element name.
+ char_t* value; ///< Pointer to any associated string data.
+
+ xml_node_struct* first_child; ///< First child
+
+ xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list)
+ xml_node_struct* next_sibling; ///< Right brother
+
+ xml_attribute_struct* first_attribute; ///< First attribute
+ };
+}
+
+PUGI__NS_BEGIN
+ struct xml_extra_buffer
+ {
+ char_t* buffer;
+ xml_extra_buffer* next;
+ };
+
+ struct xml_document_struct: public xml_node_struct, public xml_allocator
+ {
+ xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
+ {
+ }
+
+ const char_t* buffer;
+
+ xml_extra_buffer* extra_buffers;
+ };
+
+ inline xml_allocator& get_allocator(const xml_node_struct* node)
+ {
+ assert(node);
+
+ return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
+ }
+
+ template <typename Object> inline xml_document_struct& get_document(const Object* object)
+ {
+ assert(object);
+
+ return *static_cast<xml_document_struct*>(reinterpret_cast<xml_memory_page*>(object->header & xml_memory_page_pointer_mask)->allocator);
+ }
+PUGI__NS_END
+
+// Low-level DOM operations
+PUGI__NS_BEGIN
+ inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
+ {
+ xml_memory_page* page;
+ void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
+
+ return new (memory) xml_attribute_struct(page);
+ }
+
+ inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
+ {
+ xml_memory_page* page;
+ void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
+
+ return new (memory) xml_node_struct(page, type);
+ }
+
+ inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
+ {
+ uintptr_t header = a->header;
+
+ if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
+ if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
+
+ alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
+ }
+
+ inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
+ {
+ uintptr_t header = n->header;
+
+ if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
+ if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
+
+ for (xml_attribute_struct* attr = n->first_attribute; attr; )
+ {
+ xml_attribute_struct* next = attr->next_attribute;
+
+ destroy_attribute(attr, alloc);
+
+ attr = next;
+ }
+
+ for (xml_node_struct* child = n->first_child; child; )
+ {
+ xml_node_struct* next = child->next_sibling;
+
+ destroy_node(child, alloc);
+
+ child = next;
+ }
+
+ alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
+ }
+
+ inline void append_node(xml_node_struct* child, xml_node_struct* node)
+ {
+ child->parent = node;
+
+ xml_node_struct* head = node->first_child;
+
+ if (head)
+ {
+ xml_node_struct* tail = head->prev_sibling_c;
+
+ tail->next_sibling = child;
+ child->prev_sibling_c = tail;
+ head->prev_sibling_c = child;
+ }
+ else
+ {
+ node->first_child = child;
+ child->prev_sibling_c = child;
+ }
+ }
+
+ inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
+ {
+ child->parent = node;
+
+ xml_node_struct* head = node->first_child;
+
+ if (head)
+ {
+ child->prev_sibling_c = head->prev_sibling_c;
+ head->prev_sibling_c = child;
+ }
+ else
+ child->prev_sibling_c = child;
+
+ child->next_sibling = head;
+ node->first_child = child;
+ }
+
+ inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
+ {
+ xml_node_struct* parent = node->parent;
+
+ child->parent = parent;
+
+ if (node->next_sibling)
+ node->next_sibling->prev_sibling_c = child;
+ else
+ parent->first_child->prev_sibling_c = child;
+
+ child->next_sibling = node->next_sibling;
+ child->prev_sibling_c = node;
+
+ node->next_sibling = child;
+ }
+
+ inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
+ {
+ xml_node_struct* parent = node->parent;
+
+ child->parent = parent;
+
+ if (node->prev_sibling_c->next_sibling)
+ node->prev_sibling_c->next_sibling = child;
+ else
+ parent->first_child = child;
+
+ child->prev_sibling_c = node->prev_sibling_c;
+ child->next_sibling = node;
+
+ node->prev_sibling_c = child;
+ }
+
+ inline void remove_node(xml_node_struct* node)
+ {
+ xml_node_struct* parent = node->parent;
+
+ if (node->next_sibling)
+ node->next_sibling->prev_sibling_c = node->prev_sibling_c;
+ else
+ parent->first_child->prev_sibling_c = node->prev_sibling_c;
+
+ if (node->prev_sibling_c->next_sibling)
+ node->prev_sibling_c->next_sibling = node->next_sibling;
+ else
+ parent->first_child = node->next_sibling;
+
+ node->parent = 0;
+ node->prev_sibling_c = 0;
+ node->next_sibling = 0;
+ }
+
+ inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+ {
+ xml_attribute_struct* head = node->first_attribute;
+
+ if (head)
+ {
+ xml_attribute_struct* tail = head->prev_attribute_c;
+
+ tail->next_attribute = attr;
+ attr->prev_attribute_c = tail;
+ head->prev_attribute_c = attr;
+ }
+ else
+ {
+ node->first_attribute = attr;
+ attr->prev_attribute_c = attr;
+ }
+ }
+
+ inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+ {
+ xml_attribute_struct* head = node->first_attribute;
+
+ if (head)
+ {
+ attr->prev_attribute_c = head->prev_attribute_c;
+ head->prev_attribute_c = attr;
+ }
+ else
+ attr->prev_attribute_c = attr;
+
+ attr->next_attribute = head;
+ node->first_attribute = attr;
+ }
+
+ inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
+ {
+ if (place->next_attribute)
+ place->next_attribute->prev_attribute_c = attr;
+ else
+ node->first_attribute->prev_attribute_c = attr;
+
+ attr->next_attribute = place->next_attribute;
+ attr->prev_attribute_c = place;
+ place->next_attribute = attr;
+ }
+
+ inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
+ {
+ if (place->prev_attribute_c->next_attribute)
+ place->prev_attribute_c->next_attribute = attr;
+ else
+ node->first_attribute = attr;
+
+ attr->prev_attribute_c = place->prev_attribute_c;
+ attr->next_attribute = place;
+ place->prev_attribute_c = attr;
+ }
+
+ inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+ {
+ if (attr->next_attribute)
+ attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
+ else
+ node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
+
+ if (attr->prev_attribute_c->next_attribute)
+ attr->prev_attribute_c->next_attribute = attr->next_attribute;
+ else
+ node->first_attribute = attr->next_attribute;
+
+ attr->prev_attribute_c = 0;
+ attr->next_attribute = 0;
+ }
+
+ PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
+ {
+ xml_node_struct* child = allocate_node(alloc, type);
+ if (!child) return 0;
+
+ append_node(child, node);
+
+ return child;
+ }
+
+ PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
+ {
+ xml_attribute_struct* attr = allocate_attribute(alloc);
+ if (!attr) return 0;
+
+ append_attribute(attr, node);
+
+ return attr;
+ }
+PUGI__NS_END
+
+// Helper classes for code generation
+PUGI__NS_BEGIN
+ struct opt_false
+ {
+ enum { value = 0 };
+ };
+
+ struct opt_true
+ {
+ enum { value = 1 };
+ };
+PUGI__NS_END
+
+// Unicode utilities
+PUGI__NS_BEGIN
+ inline uint16_t endian_swap(uint16_t value)
+ {
+ return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
+ }
+
+ inline uint32_t endian_swap(uint32_t value)
+ {
+ return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
+ }
+
+ struct utf8_counter
+ {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ // U+0000..U+007F
+ if (ch < 0x80) return result + 1;
+ // U+0080..U+07FF
+ else if (ch < 0x800) return result + 2;
+ // U+0800..U+FFFF
+ else return result + 3;
+ }
+
+ static value_type high(value_type result, uint32_t)
+ {
+ // U+10000..U+10FFFF
+ return result + 4;
+ }
+ };
+
+ struct utf8_writer
+ {
+ typedef uint8_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ // U+0000..U+007F
+ if (ch < 0x80)
+ {
+ *result = static_cast<uint8_t>(ch);
+ return result + 1;
+ }
+ // U+0080..U+07FF
+ else if (ch < 0x800)
+ {
+ result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
+ result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+ return result + 2;
+ }
+ // U+0800..U+FFFF
+ else
+ {
+ result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
+ result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
+ result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+ return result + 3;
+ }
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ // U+10000..U+10FFFF
+ result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
+ result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
+ result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
+ result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+ return result + 4;
+ }
+
+ static value_type any(value_type result, uint32_t ch)
+ {
+ return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+ }
+ };
+
+ struct utf16_counter
+ {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t)
+ {
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t)
+ {
+ return result + 2;
+ }
+ };
+
+ struct utf16_writer
+ {
+ typedef uint16_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ *result = static_cast<uint16_t>(ch);
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
+ uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
+
+ result[0] = static_cast<uint16_t>(0xD800 + msh);
+ result[1] = static_cast<uint16_t>(0xDC00 + lsh);
+
+ return result + 2;
+ }
+
+ static value_type any(value_type result, uint32_t ch)
+ {
+ return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+ }
+ };
+
+ struct utf32_counter
+ {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t)
+ {
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t)
+ {
+ return result + 1;
+ }
+ };
+
+ struct utf32_writer
+ {
+ typedef uint32_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ *result = ch;
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ *result = ch;
+
+ return result + 1;
+ }
+
+ static value_type any(value_type result, uint32_t ch)
+ {
+ *result = ch;
+
+ return result + 1;
+ }
+ };
+
+ struct latin1_writer
+ {
+ typedef uint8_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ (void)ch;
+
+ *result = '?';
+
+ return result + 1;
+ }
+ };
+
+ template <size_t size> struct wchar_selector;
+
+ template <> struct wchar_selector<2>
+ {
+ typedef uint16_t type;
+ typedef utf16_counter counter;
+ typedef utf16_writer writer;
+ };
+
+ template <> struct wchar_selector<4>
+ {
+ typedef uint32_t type;
+ typedef utf32_counter counter;
+ typedef utf32_writer writer;
+ };
+
+ typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
+ typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
+
+ template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
+ {
+ static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
+ {
+ const uint8_t utf8_byte_mask = 0x3f;
+
+ while (size)
+ {
+ uint8_t lead = *data;
+
+ // 0xxxxxxx -> U+0000..U+007F
+ if (lead < 0x80)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ size -= 1;
+
+ // process aligned single-byte (ascii) blocks
+ if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
+ {
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
+ {
+ result = Traits::low(result, data[0]);
+ result = Traits::low(result, data[1]);
+ result = Traits::low(result, data[2]);
+ result = Traits::low(result, data[3]);
+ data += 4;
+ size -= 4;
+ }
+ }
+ }
+ // 110xxxxx -> U+0080..U+07FF
+ else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
+ {
+ result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
+ data += 2;
+ size -= 2;
+ }
+ // 1110xxxx -> U+0800-U+FFFF
+ else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
+ {
+ result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
+ data += 3;
+ size -= 3;
+ }
+ // 11110xxx -> U+10000..U+10FFFF
+ else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
+ {
+ result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
+ data += 4;
+ size -= 4;
+ }
+ // 10xxxxxx or 11111xxx -> invalid
+ else
+ {
+ data += 1;
+ size -= 1;
+ }
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
+ {
+ const uint16_t* end = data + size;
+
+ while (data < end)
+ {
+ unsigned int lead = opt_swap::value ? endian_swap(*data) : *data;
+
+ // U+0000..U+D7FF
+ if (lead < 0xD800)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ }
+ // U+E000..U+FFFF
+ else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ }
+ // surrogate pair lead
+ else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
+ {
+ uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
+
+ if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
+ {
+ result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
+ data += 2;
+ }
+ else
+ {
+ data += 1;
+ }
+ }
+ else
+ {
+ data += 1;
+ }
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
+ {
+ const uint32_t* end = data + size;
+
+ while (data < end)
+ {
+ uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
+
+ // U+0000..U+FFFF
+ if (lead < 0x10000)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ }
+ // U+10000..U+10FFFF
+ else
+ {
+ result = Traits::high(result, lead);
+ data += 1;
+ }
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result)
+ {
+ for (size_t i = 0; i < size; ++i)
+ {
+ result = Traits::low(result, data[i]);
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)
+ {
+ return decode_utf16_block(data, size, result);
+ }
+
+ static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)
+ {
+ return decode_utf32_block(data, size, result);
+ }
+
+ static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)
+ {
+ return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);
+ }
+ };
+
+ template <typename T> PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length)
+ {
+ for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
+ {
+ for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
+ }
+#endif
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+ enum chartype_t
+ {
+ ct_parse_pcdata = 1, // \0, &, \r, <
+ ct_parse_attr = 2, // \0, &, \r, ', "
+ ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
+ ct_space = 8, // \r, \n, space, tab
+ ct_parse_cdata = 16, // \0, ], >, \r
+ ct_parse_comment = 32, // \0, -, >, \r
+ ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
+ ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
+ };
+
+ static const unsigned char chartype_table[256] =
+ {
+ 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
+ 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
+ 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
+ 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
+
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
+ };
+
+ enum chartypex_t
+ {
+ ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
+ ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
+ ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
+ ctx_digit = 8, // 0-9
+ ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
+ };
+
+ static const unsigned char chartypex_table[256] =
+ {
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
+ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
+
+ 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
+ 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
+
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+ };
+
+#ifdef PUGIXML_WCHAR_MODE
+ #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
+#else
+ #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
+#endif
+
+ #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
+ #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
+
+ PUGI__FN bool is_little_endian()
+ {
+ unsigned int ui = 1;
+
+ return *reinterpret_cast<unsigned char*>(&ui) == 1;
+ }
+
+ PUGI__FN xml_encoding get_wchar_encoding()
+ {
+ PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
+
+ if (sizeof(wchar_t) == 2)
+ return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+ else
+ return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+ }
+
+ PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
+ {
+ // look for BOM in first few bytes
+ if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
+ if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
+ if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
+ if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
+ if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
+
+ // look for <, <? or <?xm in various encodings
+ if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
+ if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
+ if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
+ if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
+ if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
+
+ // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
+ if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
+ if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
+
+ // no known BOM detected, assume utf8
+ return encoding_utf8;
+ }
+
+ PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
+ {
+ // replace wchar encoding with utf implementation
+ if (encoding == encoding_wchar) return get_wchar_encoding();
+
+ // replace utf16 encoding with utf16 with specific endianness
+ if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ // replace utf32 encoding with utf32 with specific endianness
+ if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ // only do autodetection if no explicit encoding is requested
+ if (encoding != encoding_auto) return encoding;
+
+ // skip encoding autodetection if input buffer is too small
+ if (size < 4) return encoding_utf8;
+
+ // try to guess encoding (based on XML specification, Appendix F.1)
+ const uint8_t* data = static_cast<const uint8_t*>(contents);
+
+ PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
+
+ return guess_buffer_encoding(d0, d1, d2, d3);
+ }
+
+ PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+ {
+ size_t length = size / sizeof(char_t);
+
+ if (is_mutable)
+ {
+ out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
+ out_length = length;
+ }
+ else
+ {
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ if (contents)
+ memcpy(buffer, contents, length * sizeof(char_t));
+ else
+ assert(length == 0);
+
+ buffer[length] = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+ }
+
+ return true;
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
+ {
+ return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
+ (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
+ }
+
+ PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+ {
+ const char_t* data = static_cast<const char_t*>(contents);
+ size_t length = size / sizeof(char_t);
+
+ if (is_mutable)
+ {
+ char_t* buffer = const_cast<char_t*>(data);
+
+ convert_wchar_endian_swap(buffer, data, length);
+
+ out_buffer = buffer;
+ out_length = length;
+ }
+ else
+ {
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ convert_wchar_endian_swap(buffer, data, length);
+ buffer[length] = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+ }
+
+ return true;
+ }
+
+ PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
+ {
+ const uint8_t* data = static_cast<const uint8_t*>(contents);
+ size_t data_length = size;
+
+ // first pass: get length in wchar_t units
+ size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, data_length, 0);
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf8 input to wchar_t
+ wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+ wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_utf8_block(data, data_length, obegin);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ {
+ const uint16_t* data = static_cast<const uint16_t*>(contents);
+ size_t data_length = size / sizeof(uint16_t);
+
+ // first pass: get length in wchar_t units
+ size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, data_length, 0);
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf16 input to wchar_t
+ wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+ wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ {
+ const uint32_t* data = static_cast<const uint32_t*>(contents);
+ size_t data_length = size / sizeof(uint32_t);
+
+ // first pass: get length in wchar_t units
+ size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, data_length, 0);
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf32 input to wchar_t
+ wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+ wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
+ {
+ const uint8_t* data = static_cast<const uint8_t*>(contents);
+ size_t data_length = size;
+
+ // get length in wchar_t units
+ size_t length = data_length;
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // convert latin1 input to wchar_t
+ wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+ wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_latin1_block(data, data_length, obegin);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+ {
+ // get native encoding
+ xml_encoding wchar_encoding = get_wchar_encoding();
+
+ // fast path: no conversion required
+ if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // only endian-swapping is required
+ if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
+
+ // source encoding is utf8
+ if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
+
+ // source encoding is utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+ }
+
+ // source encoding is utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
+ }
+
+ // source encoding is latin1
+ if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);
+
+ assert(!"Invalid encoding");
+ return false;
+ }
+#else
+ template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ {
+ const uint16_t* data = static_cast<const uint16_t*>(contents);
+ size_t data_length = size / sizeof(uint16_t);
+
+ // first pass: get length in utf8 units
+ size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, data_length, 0);
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf16 input to utf8
+ uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
+ uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ {
+ const uint32_t* data = static_cast<const uint32_t*>(contents);
+ size_t data_length = size / sizeof(uint32_t);
+
+ // first pass: get length in utf8 units
+ size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, data_length, 0);
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf32 input to utf8
+ uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
+ uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
+ {
+ for (size_t i = 0; i < size; ++i)
+ if (data[i] > 127)
+ return i;
+
+ return size;
+ }
+
+ PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+ {
+ const uint8_t* data = static_cast<const uint8_t*>(contents);
+ size_t data_length = size;
+
+ // get size of prefix that does not need utf8 conversion
+ size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
+ assert(prefix_length <= data_length);
+
+ const uint8_t* postfix = data + prefix_length;
+ size_t postfix_length = data_length - prefix_length;
+
+ // if no conversion is needed, just return the original buffer
+ if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // first pass: get length in utf8 units
+ size_t length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert latin1 input to utf8
+ memcpy(buffer, data, prefix_length);
+
+ uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
+ uint8_t* oend = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, obegin + prefix_length);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+ {
+ // fast path: no conversion required
+ if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // source encoding is utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+ }
+
+ // source encoding is utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
+ }
+
+ // source encoding is latin1
+ if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
+
+ assert(!"Invalid encoding");
+ return false;
+ }
+#endif
+
+ PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
+ {
+ // get length in utf8 characters
+ return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);
+ }
+
+ PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
+ {
+ // convert to utf8
+ uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
+ uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(str, length, begin);
+
+ assert(begin + size == end);
+ (void)!end;
+
+ // zero-terminate
+ buffer[size] = 0;
+ }
+
+#ifndef PUGIXML_NO_STL
+ PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
+ {
+ // first pass: get length in utf8 characters
+ size_t size = as_utf8_begin(str, length);
+
+ // allocate resulting string
+ std::string result;
+ result.resize(size);
+
+ // second pass: convert to utf8
+ if (size > 0) as_utf8_end(&result[0], size, str, length);
+
+ return result;
+ }
+
+ PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
+ {
+ const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
+
+ // first pass: get length in wchar_t units
+ size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
+
+ // allocate resulting string
+ std::basic_string<wchar_t> result;
+ result.resize(length);
+
+ // second pass: convert to wchar_t
+ if (length > 0)
+ {
+ wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
+ wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
+
+ assert(begin + length == end);
+ (void)!end;
+ }
+
+ return result;
+ }
+#endif
+
+ inline bool strcpy_insitu_allow(size_t length, uintptr_t header, uintptr_t header_mask, char_t* target)
+ {
+ // never reuse shared memory
+ if (header & xml_memory_page_contents_shared_mask) return false;
+
+ size_t target_length = strlength(target);
+
+ // always reuse document buffer memory if possible
+ if ((header & header_mask) == 0) return target_length >= length;
+
+ // reuse heap memory if waste is not too great
+ const size_t reuse_threshold = 32;
+
+ return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
+ }
+
+ PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
+ {
+ assert(header);
+
+ size_t source_length = strlength(source);
+
+ if (source_length == 0)
+ {
+ // empty string and null pointer are equivalent, so just deallocate old memory
+ xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
+
+ if (header & header_mask) alloc->deallocate_string(dest);
+
+ // mark the string as not allocated
+ dest = 0;
+ header &= ~header_mask;
+
+ return true;
+ }
+ else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
+ {
+ // we can reuse old buffer, so just copy the new data (including zero terminator)
+ memcpy(dest, source, (source_length + 1) * sizeof(char_t));
+
+ return true;
+ }
+ else
+ {
+ xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
+
+ // allocate new buffer
+ char_t* buf = alloc->allocate_string(source_length + 1);
+ if (!buf) return false;
+
+ // copy the string (including zero terminator)
+ memcpy(buf, source, (source_length + 1) * sizeof(char_t));
+
+ // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
+ if (header & header_mask) alloc->deallocate_string(dest);
+
+ // the string is now allocated, so set the flag
+ dest = buf;
+ header |= header_mask;
+
+ return true;
+ }
+ }
+
+ struct gap
+ {
+ char_t* end;
+ size_t size;
+
+ gap(): end(0), size(0)
+ {
+ }
+
+ // Push new gap, move s count bytes further (skipping the gap).
+ // Collapse previous gap.
+ void push(char_t*& s, size_t count)
+ {
+ if (end) // there was a gap already; collapse it
+ {
+ // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
+ assert(s >= end);
+ memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
+ }
+
+ s += count; // end of current gap
+
+ // "merge" two gaps
+ end = s;
+ size += count;
+ }
+
+ // Collapse all gaps, return past-the-end pointer
+ char_t* flush(char_t* s)
+ {
+ if (end)
+ {
+ // Move [old_gap_end, current_pos) to [old_gap_start, ...)
+ assert(s >= end);
+ memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
+
+ return s - size;
+ }
+ else return s;
+ }
+ };
+
+ PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
+ {
+ char_t* stre = s + 1;
+
+ switch (*stre)
+ {
+ case '#': // &#...
+ {
+ unsigned int ucsc = 0;
+
+ if (stre[1] == 'x') // &#x... (hex code)
+ {
+ stre += 2;
+
+ char_t ch = *stre;
+
+ if (ch == ';') return stre;
+
+ for (;;)
+ {
+ if (static_cast<unsigned int>(ch - '0') <= 9)
+ ucsc = 16 * ucsc + (ch - '0');
+ else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
+ ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
+ else if (ch == ';')
+ break;
+ else // cancel
+ return stre;
+
+ ch = *++stre;
+ }
+
+ ++stre;
+ }
+ else // &#... (dec code)
+ {
+ char_t ch = *++stre;
+
+ if (ch == ';') return stre;
+
+ for (;;)
+ {
+ if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
+ ucsc = 10 * ucsc + (ch - '0');
+ else if (ch == ';')
+ break;
+ else // cancel
+ return stre;
+
+ ch = *++stre;
+ }
+
+ ++stre;
+ }
+
+ #ifdef PUGIXML_WCHAR_MODE
+ s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
+ #else
+ s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
+ #endif
+
+ g.push(s, stre - s);
+ return stre;
+ }
+
+ case 'a': // &a
+ {
+ ++stre;
+
+ if (*stre == 'm') // &am
+ {
+ if (*++stre == 'p' && *++stre == ';') // &
+ {
+ *s++ = '&';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ }
+ else if (*stre == 'p') // &ap
+ {
+ if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // '
+ {
+ *s++ = '\'';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ }
+ break;
+ }
+
+ case 'g': // &g
+ {
+ if (*++stre == 't' && *++stre == ';') // >
+ {
+ *s++ = '>';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ case 'l': // &l
+ {
+ if (*++stre == 't' && *++stre == ';') // <
+ {
+ *s++ = '<';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ case 'q': // &q
+ {
+ if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // "
+ {
+ *s++ = '"';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return stre;
+ }
+
+ // Parser utilities
+ #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
+ #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
+ #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
+ #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
+ #define PUGI__POPNODE() { cursor = cursor->parent; }
+ #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
+ #define PUGI__SCANWHILE(X) { while (X) ++s; }
+ #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
+ #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
+ #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
+ #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
+
+ PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
+
+ if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
+ {
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
+ {
+ *g.flush(s) = 0;
+
+ return s + (s[2] == '>' ? 3 : 2);
+ }
+ else if (*s == 0)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
+
+ if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
+ {
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
+ {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ }
+ else if (*s == 0)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ typedef char_t* (*strconv_pcdata_t)(char_t*);
+
+ template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
+ {
+ static char_t* parse(char_t* s)
+ {
+ gap g;
+
+ char_t* begin = s;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
+
+ if (*s == '<') // PCDATA ends here
+ {
+ char_t* end = g.flush(s);
+
+ if (opt_trim::value)
+ while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
+ --end;
+
+ *end = 0;
+
+ return s + 1;
+ }
+ else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
+ {
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (*s == 0)
+ {
+ char_t* end = g.flush(s);
+
+ if (opt_trim::value)
+ while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
+ --end;
+
+ *end = 0;
+
+ return s;
+ }
+ else ++s;
+ }
+ }
+ };
+
+ PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
+ {
+ PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
+
+ switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
+ {
+ case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
+ case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
+ case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
+ case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
+ case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
+ case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
+ case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
+ case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
+ default: assert(false); return 0; // should not get here
+ }
+ }
+
+ typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
+
+ template <typename opt_escape> struct strconv_attribute_impl
+ {
+ static char_t* parse_wnorm(char_t* s, char_t end_quote)
+ {
+ gap g;
+
+ // trim leading whitespaces
+ if (PUGI__IS_CHARTYPE(*s, ct_space))
+ {
+ char_t* str = s;
+
+ do ++str;
+ while (PUGI__IS_CHARTYPE(*str, ct_space));
+
+ g.push(s, str - s);
+ }
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
+
+ if (*s == end_quote)
+ {
+ char_t* str = g.flush(s);
+
+ do *str-- = 0;
+ while (PUGI__IS_CHARTYPE(*str, ct_space));
+
+ return s + 1;
+ }
+ else if (PUGI__IS_CHARTYPE(*s, ct_space))
+ {
+ *s++ = ' ';
+
+ if (PUGI__IS_CHARTYPE(*s, ct_space))
+ {
+ char_t* str = s + 1;
+ while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
+
+ g.push(s, str - s);
+ }
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (!*s)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ static char_t* parse_wconv(char_t* s, char_t end_quote)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
+
+ if (*s == end_quote)
+ {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ }
+ else if (PUGI__IS_CHARTYPE(*s, ct_space))
+ {
+ if (*s == '\r')
+ {
+ *s++ = ' ';
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else *s++ = ' ';
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (!*s)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ static char_t* parse_eol(char_t* s, char_t end_quote)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
+
+ if (*s == end_quote)
+ {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ }
+ else if (*s == '\r')
+ {
+ *s++ = '\n';
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (!*s)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ static char_t* parse_simple(char_t* s, char_t end_quote)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
+
+ if (*s == end_quote)
+ {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (!*s)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+ };
+
+ PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
+ {
+ PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
+
+ switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
+ {
+ case 0: return strconv_attribute_impl<opt_false>::parse_simple;
+ case 1: return strconv_attribute_impl<opt_true>::parse_simple;
+ case 2: return strconv_attribute_impl<opt_false>::parse_eol;
+ case 3: return strconv_attribute_impl<opt_true>::parse_eol;
+ case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
+ case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
+ case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
+ case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
+ case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
+ case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
+ case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
+ case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
+ case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
+ case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
+ case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
+ case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
+ default: assert(false); return 0; // should not get here
+ }
+ }
+
+ inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
+ {
+ xml_parse_result result;
+ result.status = status;
+ result.offset = offset;
+
+ return result;
+ }
+
+ struct xml_parser
+ {
+ xml_allocator alloc;
+ char_t* error_offset;
+ xml_parse_status error_status;
+
+ xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
+ {
+ }
+
+ // DOCTYPE consists of nested sections of the following possible types:
+ // <!-- ... -->, <? ... ?>, "...", '...'
+ // <![...]]>
+ // <!...>
+ // First group can not contain nested groups
+ // Second group can contain nested groups of the same type
+ // Third group can contain all other groups
+ char_t* parse_doctype_primitive(char_t* s)
+ {
+ if (*s == '"' || *s == '\'')
+ {
+ // quoted string
+ char_t ch = *s++;
+ PUGI__SCANFOR(*s == ch);
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s++;
+ }
+ else if (s[0] == '<' && s[1] == '?')
+ {
+ // <? ... ?>
+ s += 2;
+ PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s += 2;
+ }
+ else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
+ {
+ s += 4;
+ PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s += 4;
+ }
+ else PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ return s;
+ }
+
+ char_t* parse_doctype_ignore(char_t* s)
+ {
+ size_t depth = 0;
+
+ assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
+ s += 3;
+
+ while (*s)
+ {
+ if (s[0] == '<' && s[1] == '!' && s[2] == '[')
+ {
+ // nested ignore section
+ s += 3;
+ depth++;
+ }
+ else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
+ {
+ // ignore section end
+ s += 3;
+
+ if (depth == 0)
+ return s;
+
+ depth--;
+ }
+ else s++;
+ }
+
+ PUGI__THROW_ERROR(status_bad_doctype, s);
+ }
+
+ char_t* parse_doctype_group(char_t* s, char_t endch)
+ {
+ size_t depth = 0;
+
+ assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
+ s += 2;
+
+ while (*s)
+ {
+ if (s[0] == '<' && s[1] == '!' && s[2] != '-')
+ {
+ if (s[2] == '[')
+ {
+ // ignore
+ s = parse_doctype_ignore(s);
+ if (!s) return s;
+ }
+ else
+ {
+ // some control group
+ s += 2;
+ depth++;
+ }
+ }
+ else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
+ {
+ // unknown tag (forbidden), or some primitive group
+ s = parse_doctype_primitive(s);
+ if (!s) return s;
+ }
+ else if (*s == '>')
+ {
+ if (depth == 0)
+ return s;
+
+ depth--;
+ s++;
+ }
+ else s++;
+ }
+
+ if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ return s;
+ }
+
+ char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
+ {
+ // parse node contents, starting with exclamation mark
+ ++s;
+
+ if (*s == '-') // '<!-...'
+ {
+ ++s;
+
+ if (*s == '-') // '<!--...'
+ {
+ ++s;
+
+ if (PUGI__OPTSET(parse_comments))
+ {
+ PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
+ cursor->value = s; // Save the offset.
+ }
+
+ if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
+ {
+ s = strconv_comment(s, endch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
+ }
+ else
+ {
+ // Scan for terminating '-->'.
+ PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_comment, s);
+
+ if (PUGI__OPTSET(parse_comments))
+ *s = 0; // Zero-terminate this segment at the first terminating '-'.
+
+ s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
+ }
+ }
+ else PUGI__THROW_ERROR(status_bad_comment, s);
+ }
+ else if (*s == '[')
+ {
+ // '<![CDATA[...'
+ if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
+ {
+ ++s;
+
+ if (PUGI__OPTSET(parse_cdata))
+ {
+ PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
+ cursor->value = s; // Save the offset.
+
+ if (PUGI__OPTSET(parse_eol))
+ {
+ s = strconv_cdata(s, endch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
+ }
+ else
+ {
+ // Scan for terminating ']]>'.
+ PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+ *s++ = 0; // Zero-terminate this segment.
+ }
+ }
+ else // Flagged for discard, but we still have to scan for the terminator.
+ {
+ // Scan for terminating ']]>'.
+ PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+ ++s;
+ }
+
+ s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
+ }
+ else PUGI__THROW_ERROR(status_bad_cdata, s);
+ }
+ else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
+ {
+ s -= 2;
+
+ if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ char_t* mark = s + 9;
+
+ s = parse_doctype_group(s, endch);
+ if (!s) return s;
+
+ assert((*s == 0 && endch == '>') || *s == '>');
+ if (*s) *s++ = 0;
+
+ if (PUGI__OPTSET(parse_doctype))
+ {
+ while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
+
+ PUGI__PUSHNODE(node_doctype);
+
+ cursor->value = mark;
+ }
+ }
+ else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
+ else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
+ else PUGI__THROW_ERROR(status_unrecognized_tag, s);
+
+ return s;
+ }
+
+ char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
+ {
+ // load into registers
+ xml_node_struct* cursor = ref_cursor;
+ char_t ch = 0;
+
+ // parse node contents, starting with question mark
+ ++s;
+
+ // read PI target
+ char_t* target = s;
+
+ if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
+
+ PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ // determine node type; stricmp / strcasecmp is not portable
+ bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
+
+ if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
+ {
+ if (declaration)
+ {
+ // disallow non top-level declarations
+ if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
+
+ PUGI__PUSHNODE(node_declaration);
+ }
+ else
+ {
+ PUGI__PUSHNODE(node_pi);
+ }
+
+ cursor->name = target;
+
+ PUGI__ENDSEG();
+
+ // parse value/attributes
+ if (ch == '?')
+ {
+ // empty node
+ if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
+ s += (*s == '>');
+
+ PUGI__POPNODE();
+ }
+ else if (PUGI__IS_CHARTYPE(ch, ct_space))
+ {
+ PUGI__SKIPWS();
+
+ // scan for tag end
+ char_t* value = s;
+
+ PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ if (declaration)
+ {
+ // replace ending ? with / so that 'element' terminates properly
+ *s = '/';
+
+ // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
+ s = value;
+ }
+ else
+ {
+ // store value and step over >
+ cursor->value = value;
+ PUGI__POPNODE();
+
+ PUGI__ENDSEG();
+
+ s += (*s == '>');
+ }
+ }
+ else PUGI__THROW_ERROR(status_bad_pi, s);
+ }
+ else
+ {
+ // scan for tag end
+ PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ s += (s[1] == '>' ? 2 : 1);
+ }
+
+ // store from registers
+ ref_cursor = cursor;
+
+ return s;
+ }
+
+ char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
+ {
+ strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
+ strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
+
+ char_t ch = 0;
+ xml_node_struct* cursor = root;
+ char_t* mark = s;
+
+ while (*s != 0)
+ {
+ if (*s == '<')
+ {
+ ++s;
+
+ LOC_TAG:
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
+ {
+ PUGI__PUSHNODE(node_element); // Append a new node to the tree.
+
+ cursor->name = s;
+
+ PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
+ PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+
+ if (ch == '>')
+ {
+ // end of tag
+ }
+ else if (PUGI__IS_CHARTYPE(ch, ct_space))
+ {
+ LOC_ATTRIBUTES:
+ while (true)
+ {
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
+ {
+ xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
+ if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
+
+ a->name = s; // Save the offset.
+
+ PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
+ PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+
+ if (PUGI__IS_CHARTYPE(ch, ct_space))
+ {
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ ch = *s;
+ ++s;
+ }
+
+ if (ch == '=') // '<... #=...'
+ {
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ if (*s == '"' || *s == '\'') // '<... #="...'
+ {
+ ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
+ ++s; // Step over the quote.
+ a->value = s; // Save the offset.
+
+ s = strconv_attribute(s, ch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
+
+ // After this line the loop continues from the start;
+ // Whitespaces, / and > are ok, symbols and EOF are wrong,
+ // everything else will be detected
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
+ }
+ else PUGI__THROW_ERROR(status_bad_attribute, s);
+ }
+ else PUGI__THROW_ERROR(status_bad_attribute, s);
+ }
+ else if (*s == '/')
+ {
+ ++s;
+
+ if (*s == '>')
+ {
+ PUGI__POPNODE();
+ s++;
+ break;
+ }
+ else if (*s == 0 && endch == '>')
+ {
+ PUGI__POPNODE();
+ break;
+ }
+ else PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+ else if (*s == '>')
+ {
+ ++s;
+
+ break;
+ }
+ else if (*s == 0 && endch == '>')
+ {
+ break;
+ }
+ else PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+
+ // !!!
+ }
+ else if (ch == '/') // '<#.../'
+ {
+ if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
+
+ PUGI__POPNODE(); // Pop.
+
+ s += (*s == '>');
+ }
+ else if (ch == 0)
+ {
+ // we stepped over null terminator, backtrack & handle closing tag
+ --s;
+
+ if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+ else PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+ else if (*s == '/')
+ {
+ ++s;
+
+ char_t* name = cursor->name;
+ if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+
+ while (PUGI__IS_CHARTYPE(*s, ct_symbol))
+ {
+ if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+ }
+
+ if (*name)
+ {
+ if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
+ else PUGI__THROW_ERROR(status_end_element_mismatch, s);
+ }
+
+ PUGI__POPNODE(); // Pop.
+
+ PUGI__SKIPWS();
+
+ if (*s == 0)
+ {
+ if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+ }
+ else
+ {
+ if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+ ++s;
+ }
+ }
+ else if (*s == '?') // '<?...'
+ {
+ s = parse_question(s, cursor, optmsk, endch);
+ if (!s) return s;
+
+ assert(cursor);
+ if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
+ }
+ else if (*s == '!') // '<!...'
+ {
+ s = parse_exclamation(s, cursor, optmsk, endch);
+ if (!s) return s;
+ }
+ else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
+ else PUGI__THROW_ERROR(status_unrecognized_tag, s);
+ }
+ else
+ {
+ mark = s; // Save this offset while searching for a terminator.
+
+ PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
+
+ if (*s == '<' || !*s)
+ {
+ // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
+ assert(mark != s);
+
+ if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
+ {
+ continue;
+ }
+ else if (PUGI__OPTSET(parse_ws_pcdata_single))
+ {
+ if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
+ }
+ }
+
+ if (!PUGI__OPTSET(parse_trim_pcdata))
+ s = mark;
+
+ if (cursor->parent || PUGI__OPTSET(parse_fragment))
+ {
+ PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
+ cursor->value = s; // Save the offset.
+
+ s = strconv_pcdata(s);
+
+ PUGI__POPNODE(); // Pop since this is a standalone.
+
+ if (!*s) break;
+ }
+ else
+ {
+ PUGI__SCANFOR(*s == '<'); // '...<'
+ if (!*s) break;
+
+ ++s;
+ }
+
+ // We're after '<'
+ goto LOC_TAG;
+ }
+ }
+
+ // check that last tag is closed
+ if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+
+ return s;
+ }
+
+ #ifdef PUGIXML_WCHAR_MODE
+ static char_t* parse_skip_bom(char_t* s)
+ {
+ unsigned int bom = 0xfeff;
+ return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
+ }
+ #else
+ static char_t* parse_skip_bom(char_t* s)
+ {
+ return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
+ }
+ #endif
+
+ static bool has_element_node_siblings(xml_node_struct* node)
+ {
+ while (node)
+ {
+ if (PUGI__NODETYPE(node) == node_element) return true;
+
+ node = node->next_sibling;
+ }
+
+ return false;
+ }
+
+ static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
+ {
+ // allocator object is a part of document object
+ xml_allocator& alloc_ = *static_cast<xml_allocator*>(xmldoc);
+
+ // early-out for empty documents
+ if (length == 0)
+ return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
+
+ // get last child of the root before parsing
+ xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c : 0;
+
+ // create parser on stack
+ xml_parser parser(alloc_);
+
+ // save last character and make buffer zero-terminated (speeds up parsing)
+ char_t endch = buffer[length - 1];
+ buffer[length - 1] = 0;
+
+ // skip BOM to make sure it does not end up as part of parse output
+ char_t* buffer_data = parse_skip_bom(buffer);
+
+ // perform actual parsing
+ parser.parse_tree(buffer_data, root, optmsk, endch);
+
+ // update allocator state
+ alloc_ = parser.alloc;
+
+ xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
+ assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
+
+ if (result)
+ {
+ // since we removed last character, we have to handle the only possible false positive (stray <)
+ if (endch == '<')
+ return make_parse_result(status_unrecognized_tag, length - 1);
+
+ // check if there are any element nodes parsed
+ xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling : root->first_child;
+
+ if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
+ return make_parse_result(status_no_document_element, length - 1);
+ }
+ else
+ {
+ // roll back offset if it occurs on a null terminator in the source buffer
+ if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
+ result.offset--;
+ }
+
+ return result;
+ }
+ };
+
+ // Output facilities
+ PUGI__FN xml_encoding get_write_native_encoding()
+ {
+ #ifdef PUGIXML_WCHAR_MODE
+ return get_wchar_encoding();
+ #else
+ return encoding_utf8;
+ #endif
+ }
+
+ PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
+ {
+ // replace wchar encoding with utf implementation
+ if (encoding == encoding_wchar) return get_wchar_encoding();
+
+ // replace utf16 encoding with utf16 with specific endianness
+ if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ // replace utf32 encoding with utf32 with specific endianness
+ if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ // only do autodetection if no explicit encoding is requested
+ if (encoding != encoding_auto) return encoding;
+
+ // assume utf8 encoding
+ return encoding_utf8;
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+ {
+ if (length < 1) return 0;
+
+ // discard last character if it's the lead of a surrogate pair
+ return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
+ }
+
+ PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+ {
+ // only endian-swapping is required
+ if (need_endian_swap_utf(encoding, get_wchar_encoding()))
+ {
+ convert_wchar_endian_swap(r_char, data, length);
+
+ return length * sizeof(char_t);
+ }
+
+ // convert to utf8
+ if (encoding == encoding_utf8)
+ {
+ uint8_t* dest = r_u8;
+ uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(data, length, dest);
+
+ return static_cast<size_t>(end - dest);
+ }
+
+ // convert to utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ uint16_t* dest = r_u16;
+
+ // convert to native utf16
+ uint16_t* end = utf_decoder<utf16_writer>::decode_wchar_block(data, length, dest);
+
+ // swap if necessary
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
+
+ return static_cast<size_t>(end - dest) * sizeof(uint16_t);
+ }
+
+ // convert to utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+ {
+ uint32_t* dest = r_u32;
+
+ // convert to native utf32
+ uint32_t* end = utf_decoder<utf32_writer>::decode_wchar_block(data, length, dest);
+
+ // swap if necessary
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
+
+ return static_cast<size_t>(end - dest) * sizeof(uint32_t);
+ }
+
+ // convert to latin1
+ if (encoding == encoding_latin1)
+ {
+ uint8_t* dest = r_u8;
+ uint8_t* end = utf_decoder<latin1_writer>::decode_wchar_block(data, length, dest);
+
+ return static_cast<size_t>(end - dest);
+ }
+
+ assert(!"Invalid encoding");
+ return 0;
+ }
+#else
+ PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+ {
+ if (length < 5) return 0;
+
+ for (size_t i = 1; i <= 4; ++i)
+ {
+ uint8_t ch = static_cast<uint8_t>(data[length - i]);
+
+ // either a standalone character or a leading one
+ if ((ch & 0xc0) != 0x80) return length - i;
+ }
+
+ // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
+ return length;
+ }
+
+ PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+ {
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ uint16_t* dest = r_u16;
+
+ // convert to native utf16
+ uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
+
+ // swap if necessary
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
+
+ return static_cast<size_t>(end - dest) * sizeof(uint16_t);
+ }
+
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+ {
+ uint32_t* dest = r_u32;
+
+ // convert to native utf32
+ uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
+
+ // swap if necessary
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
+
+ return static_cast<size_t>(end - dest) * sizeof(uint32_t);
+ }
+
+ if (encoding == encoding_latin1)
+ {
+ uint8_t* dest = r_u8;
+ uint8_t* end = utf_decoder<latin1_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
+
+ return static_cast<size_t>(end - dest);
+ }
+
+ assert(!"Invalid encoding");
+ return 0;
+ }
+#endif
+
+ class xml_buffered_writer
+ {
+ xml_buffered_writer(const xml_buffered_writer&);
+ xml_buffered_writer& operator=(const xml_buffered_writer&);
+
+ public:
+ xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
+ {
+ PUGI__STATIC_ASSERT(bufcapacity >= 8);
+ }
+
+ ~xml_buffered_writer()
+ {
+ flush();
+ }
+
+ size_t flush()
+ {
+ flush(buffer, bufsize);
+ bufsize = 0;
+ return 0;
+ }
+
+ void flush(const char_t* data, size_t size)
+ {
+ if (size == 0) return;
+
+ // fast path, just write data
+ if (encoding == get_write_native_encoding())
+ writer.write(data, size * sizeof(char_t));
+ else
+ {
+ // convert chunk
+ size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
+ assert(result <= sizeof(scratch));
+
+ // write data
+ writer.write(scratch.data_u8, result);
+ }
+ }
+
+ void write_direct(const char_t* data, size_t length)
+ {
+ // flush the remaining buffer contents
+ flush();
+
+ // handle large chunks
+ if (length > bufcapacity)
+ {
+ if (encoding == get_write_native_encoding())
+ {
+ // fast path, can just write data chunk
+ writer.write(data, length * sizeof(char_t));
+ return;
+ }
+
+ // need to convert in suitable chunks
+ while (length > bufcapacity)
+ {
+ // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
+ // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
+ size_t chunk_size = get_valid_length(data, bufcapacity);
+ assert(chunk_size);
+
+ // convert chunk and write
+ flush(data, chunk_size);
+
+ // iterate
+ data += chunk_size;
+ length -= chunk_size;
+ }
+
+ // small tail is copied below
+ bufsize = 0;
+ }
+
+ memcpy(buffer + bufsize, data, length * sizeof(char_t));
+ bufsize += length;
+ }
+
+ void write_buffer(const char_t* data, size_t length)
+ {
+ size_t offset = bufsize;
+
+ if (offset + length <= bufcapacity)
+ {
+ memcpy(buffer + offset, data, length * sizeof(char_t));
+ bufsize = offset + length;
+ }
+ else
+ {
+ write_direct(data, length);
+ }
+ }
+
+ void write_string(const char_t* data)
+ {
+ // write the part of the string that fits in the buffer
+ size_t offset = bufsize;
+
+ while (*data && offset < bufcapacity)
+ buffer[offset++] = *data++;
+
+ // write the rest
+ if (offset < bufcapacity)
+ {
+ bufsize = offset;
+ }
+ else
+ {
+ // backtrack a bit if we have split the codepoint
+ size_t length = offset - bufsize;
+ size_t extra = length - get_valid_length(data - length, length);
+
+ bufsize = offset - extra;
+
+ write_direct(data - extra, strlength(data) + extra);
+ }
+ }
+
+ void write(char_t d0)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 1) offset = flush();
+
+ buffer[offset + 0] = d0;
+ bufsize = offset + 1;
+ }
+
+ void write(char_t d0, char_t d1)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 2) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ bufsize = offset + 2;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 3) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ bufsize = offset + 3;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 4) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ buffer[offset + 3] = d3;
+ bufsize = offset + 4;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 5) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ buffer[offset + 3] = d3;
+ buffer[offset + 4] = d4;
+ bufsize = offset + 5;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 6) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ buffer[offset + 3] = d3;
+ buffer[offset + 4] = d4;
+ buffer[offset + 5] = d5;
+ bufsize = offset + 6;
+ }
+
+ // utf8 maximum expansion: x4 (-> utf32)
+ // utf16 maximum expansion: x2 (-> utf32)
+ // utf32 maximum expansion: x1
+ enum
+ {
+ bufcapacitybytes =
+ #ifdef PUGIXML_MEMORY_OUTPUT_STACK
+ PUGIXML_MEMORY_OUTPUT_STACK
+ #else
+ 10240
+ #endif
+ ,
+ bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
+ };
+
+ char_t buffer[bufcapacity];
+
+ union
+ {
+ uint8_t data_u8[4 * bufcapacity];
+ uint16_t data_u16[2 * bufcapacity];
+ uint32_t data_u32[bufcapacity];
+ char_t data_char[bufcapacity];
+ } scratch;
+
+ xml_writer& writer;
+ size_t bufsize;
+ xml_encoding encoding;
+ };
+
+ PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
+ {
+ while (*s)
+ {
+ const char_t* prev = s;
+
+ // While *s is a usual symbol
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
+
+ writer.write_buffer(prev, static_cast<size_t>(s - prev));
+
+ switch (*s)
+ {
+ case 0: break;
+ case '&':
+ writer.write('&', 'a', 'm', 'p', ';');
+ ++s;
+ break;
+ case '<':
+ writer.write('&', 'l', 't', ';');
+ ++s;
+ break;
+ case '>':
+ writer.write('&', 'g', 't', ';');
+ ++s;
+ break;
+ case '"':
+ writer.write('&', 'q', 'u', 'o', 't', ';');
+ ++s;
+ break;
+ default: // s is not a usual symbol
+ {
+ unsigned int ch = static_cast<unsigned int>(*s++);
+ assert(ch < 32);
+
+ writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
+ }
+ }
+ }
+ }
+
+ PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
+ {
+ if (flags & format_no_escapes)
+ writer.write_string(s);
+ else
+ text_output_escaped(writer, s, type);
+ }
+
+ PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
+ {
+ do
+ {
+ writer.write('<', '!', '[', 'C', 'D');
+ writer.write('A', 'T', 'A', '[');
+
+ const char_t* prev = s;
+
+ // look for ]]> sequence - we can't output it as is since it terminates CDATA
+ while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
+
+ // skip ]] if we stopped at ]]>, > will go to the next CDATA section
+ if (*s) s += 2;
+
+ writer.write_buffer(prev, static_cast<size_t>(s - prev));
+
+ writer.write(']', ']', '>');
+ }
+ while (*s);
+ }
+
+ PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
+ {
+ switch (indent_length)
+ {
+ case 1:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0]);
+ break;
+ }
+
+ case 2:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0], indent[1]);
+ break;
+ }
+
+ case 3:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0], indent[1], indent[2]);
+ break;
+ }
+
+ case 4:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0], indent[1], indent[2], indent[3]);
+ break;
+ }
+
+ default:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write_buffer(indent, indent_length);
+ }
+ }
+ }
+
+ PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
+ {
+ writer.write('<', '!', '-', '-');
+
+ while (*s)
+ {
+ const char_t* prev = s;
+
+ // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
+ while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
+
+ writer.write_buffer(prev, static_cast<size_t>(s - prev));
+
+ if (*s)
+ {
+ assert(*s == '-');
+
+ writer.write('-', ' ');
+ ++s;
+ }
+ }
+
+ writer.write('-', '-', '>');
+ }
+
+ PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
+ {
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+
+ for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
+ {
+ writer.write(' ');
+ writer.write_string(a->name ? a->name : default_name);
+ writer.write('=', '"');
+
+ if (a->value)
+ text_output(writer, a->value, ctx_special_attr, flags);
+
+ writer.write('"');
+ }
+ }
+
+ PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
+ {
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+ const char_t* name = node->name ? node->name : default_name;
+
+ writer.write('<');
+ writer.write_string(name);
+
+ if (node->first_attribute)
+ node_output_attributes(writer, node, flags);
+
+ if (flags & format_raw)
+ {
+ if (!node->first_child)
+ writer.write(' ', '/', '>');
+ else
+ {
+ writer.write('>');
+
+ return true;
+ }
+ }
+ else
+ {
+ xml_node_struct* first = node->first_child;
+
+ if (!first)
+ writer.write(' ', '/', '>', '\n');
+ else if (!first->next_sibling && (PUGI__NODETYPE(first) == node_pcdata || PUGI__NODETYPE(first) == node_cdata))
+ {
+ writer.write('>');
+
+ const char_t* value = first->value ? first->value : PUGIXML_TEXT("");
+
+ if (PUGI__NODETYPE(first) == node_pcdata)
+ text_output(writer, value, ctx_special_pcdata, flags);
+ else
+ text_output_cdata(writer, value);
+
+ writer.write('<', '/');
+ writer.write_string(name);
+ writer.write('>', '\n');
+ }
+ else
+ {
+ writer.write('>', '\n');
+
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
+ {
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+ const char_t* name = node->name ? node->name : default_name;
+
+ writer.write('<', '/');
+ writer.write_string(name);
+
+ if (flags & format_raw)
+ writer.write('>');
+ else
+ writer.write('>', '\n');
+ }
+
+ PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
+ {
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+
+ switch (PUGI__NODETYPE(node))
+ {
+ case node_pcdata:
+ text_output(writer, node->value ? node->value : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_cdata:
+ text_output_cdata(writer, node->value ? node->value : PUGIXML_TEXT(""));
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_comment:
+ node_output_comment(writer, node->value ? node->value : PUGIXML_TEXT(""));
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_pi:
+ writer.write('<', '?');
+ writer.write_string(node->name ? node->name : default_name);
+
+ if (node->value)
+ {
+ writer.write(' ');
+ writer.write_string(node->value);
+ }
+
+ writer.write('?', '>');
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_declaration:
+ writer.write('<', '?');
+ writer.write_string(node->name ? node->name : default_name);
+ node_output_attributes(writer, node, flags);
+ writer.write('?', '>');
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_doctype:
+ writer.write('<', '!', 'D', 'O', 'C');
+ writer.write('T', 'Y', 'P', 'E');
+
+ if (node->value)
+ {
+ writer.write(' ');
+ writer.write_string(node->value);
+ }
+
+ writer.write('>');
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ default:
+ assert(!"Invalid node type");
+ }
+ }
+
+ PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
+ {
+ size_t indent_length = ((flags & (format_indent | format_raw)) == format_indent) ? strlength(indent) : 0;
+
+ xml_node_struct* node = root;
+
+ do
+ {
+ assert(node);
+
+ // begin writing current node
+ if (indent_length)
+ text_output_indent(writer, indent, indent_length, depth);
+
+ if (PUGI__NODETYPE(node) == node_element)
+ {
+ if (node_output_start(writer, node, flags))
+ {
+ node = node->first_child;
+ depth++;
+ continue;
+ }
+ }
+ else if (PUGI__NODETYPE(node) == node_document)
+ {
+ if (node->first_child)
+ {
+ node = node->first_child;
+ continue;
+ }
+ }
+ else
+ {
+ node_output_simple(writer, node, flags);
+ }
+
+ // continue to the next node
+ while (node != root)
+ {
+ if (node->next_sibling)
+ {
+ node = node->next_sibling;
+ break;
+ }
+
+ node = node->parent;
+
+ // write closing node
+ if (PUGI__NODETYPE(node) == node_element)
+ {
+ depth--;
+
+ if (indent_length)
+ text_output_indent(writer, indent, indent_length, depth);
+
+ node_output_end(writer, node, flags);
+ }
+ }
+ }
+ while (node != root);
+ }
+
+ PUGI__FN bool has_declaration(xml_node_struct* node)
+ {
+ for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
+ {
+ xml_node_type type = PUGI__NODETYPE(child);
+
+ if (type == node_declaration) return true;
+ if (type == node_element) return false;
+ }
+
+ return false;
+ }
+
+ PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
+ {
+ for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
+ if (a == attr)
+ return true;
+
+ return false;
+ }
+
+ PUGI__FN bool allow_insert_attribute(xml_node_type parent)
+ {
+ return parent == node_element || parent == node_declaration;
+ }
+
+ PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
+ {
+ if (parent != node_document && parent != node_element) return false;
+ if (child == node_document || child == node_null) return false;
+ if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
+
+ return true;
+ }
+
+ PUGI__FN bool allow_move(xml_node parent, xml_node child)
+ {
+ // check that child can be a child of parent
+ if (!allow_insert_child(parent.type(), child.type()))
+ return false;
+
+ // check that node is not moved between documents
+ if (parent.root() != child.root())
+ return false;
+
+ // check that new parent is not in the child subtree
+ xml_node cur = parent;
+
+ while (cur)
+ {
+ if (cur == child)
+ return false;
+
+ cur = cur.parent();
+ }
+
+ return true;
+ }
+
+ PUGI__FN void node_copy_string(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char_t* source, uintptr_t& source_header, xml_allocator* alloc)
+ {
+ assert(!dest && (header & header_mask) == 0);
+
+ if (source)
+ {
+ if (alloc && (source_header & header_mask) == 0)
+ {
+ dest = source;
+
+ // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
+ header |= xml_memory_page_contents_shared_mask;
+ source_header |= xml_memory_page_contents_shared_mask;
+ }
+ else
+ strcpy_insitu(dest, header, header_mask, source);
+ }
+ }
+
+ PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
+ {
+ node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
+ node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
+
+ for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
+ {
+ xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
+
+ if (da)
+ {
+ node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
+ node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
+ }
+ }
+ }
+
+ PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
+ {
+ xml_allocator& alloc = get_allocator(dn);
+ xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
+
+ node_copy_contents(dn, sn, shared_alloc);
+
+ xml_node_struct* dit = dn;
+ xml_node_struct* sit = sn->first_child;
+
+ while (sit && sit != sn)
+ {
+ if (sit != dn)
+ {
+ xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
+
+ if (copy)
+ {
+ node_copy_contents(copy, sit, shared_alloc);
+
+ if (sit->first_child)
+ {
+ dit = copy;
+ sit = sit->first_child;
+ continue;
+ }
+ }
+ }
+
+ // continue to the next node
+ do
+ {
+ if (sit->next_sibling)
+ {
+ sit = sit->next_sibling;
+ break;
+ }
+
+ sit = sit->parent;
+ dit = dit->parent;
+ }
+ while (sit != sn);
+ }
+ }
+
+ inline bool is_text_node(xml_node_struct* node)
+ {
+ xml_node_type type = PUGI__NODETYPE(node);
+
+ return type == node_pcdata || type == node_cdata;
+ }
+
+ // get value with conversion functions
+ PUGI__FN int get_integer_base(const char_t* value)
+ {
+ const char_t* s = value;
+
+ while (PUGI__IS_CHARTYPE(*s, ct_space))
+ s++;
+
+ if (*s == '-')
+ s++;
+
+ return (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) ? 16 : 10;
+ }
+
+ PUGI__FN int get_value_int(const char_t* value, int def)
+ {
+ if (!value) return def;
+
+ int base = get_integer_base(value);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return static_cast<int>(wcstol(value, 0, base));
+ #else
+ return static_cast<int>(strtol(value, 0, base));
+ #endif
+ }
+
+ PUGI__FN unsigned int get_value_uint(const char_t* value, unsigned int def)
+ {
+ if (!value) return def;
+
+ int base = get_integer_base(value);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return static_cast<unsigned int>(wcstoul(value, 0, base));
+ #else
+ return static_cast<unsigned int>(strtoul(value, 0, base));
+ #endif
+ }
+
+ PUGI__FN double get_value_double(const char_t* value, double def)
+ {
+ if (!value) return def;
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcstod(value, 0);
+ #else
+ return strtod(value, 0);
+ #endif
+ }
+
+ PUGI__FN float get_value_float(const char_t* value, float def)
+ {
+ if (!value) return def;
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return static_cast<float>(wcstod(value, 0));
+ #else
+ return static_cast<float>(strtod(value, 0));
+ #endif
+ }
+
+ PUGI__FN bool get_value_bool(const char_t* value, bool def)
+ {
+ if (!value) return def;
+
+ // only look at first char
+ char_t first = *value;
+
+ // 1*, t* (true), T* (True), y* (yes), Y* (YES)
+ return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
+ }
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ PUGI__FN long long get_value_llong(const char_t* value, long long def)
+ {
+ if (!value) return def;
+
+ int base = get_integer_base(value);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ #ifdef PUGI__MSVC_CRT_VERSION
+ return _wcstoi64(value, 0, base);
+ #else
+ return wcstoll(value, 0, base);
+ #endif
+ #else
+ #ifdef PUGI__MSVC_CRT_VERSION
+ return _strtoi64(value, 0, base);
+ #else
+ return strtoll(value, 0, base);
+ #endif
+ #endif
+ }
+
+ PUGI__FN unsigned long long get_value_ullong(const char_t* value, unsigned long long def)
+ {
+ if (!value) return def;
+
+ int base = get_integer_base(value);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ #ifdef PUGI__MSVC_CRT_VERSION
+ return _wcstoui64(value, 0, base);
+ #else
+ return wcstoull(value, 0, base);
+ #endif
+ #else
+ #ifdef PUGI__MSVC_CRT_VERSION
+ return _strtoui64(value, 0, base);
+ #else
+ return strtoull(value, 0, base);
+ #endif
+ #endif
+ }
+#endif
+
+ // set value with conversion functions
+ PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])
+ {
+ #ifdef PUGIXML_WCHAR_MODE
+ char_t wbuf[128];
+ impl::widen_ascii(wbuf, buf);
+
+ return strcpy_insitu(dest, header, header_mask, wbuf);
+ #else
+ return strcpy_insitu(dest, header, header_mask, buf);
+ #endif
+ }
+
+ PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value)
+ {
+ char buf[128];
+ sprintf(buf, "%d", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+ }
+
+ PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value)
+ {
+ char buf[128];
+ sprintf(buf, "%u", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+ }
+
+ PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, float value)
+ {
+ char buf[128];
+ sprintf(buf, "%.9g", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+ }
+
+ PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value)
+ {
+ char buf[128];
+ sprintf(buf, "%.17g", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+ }
+
+ PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value)
+ {
+ return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
+ }
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, long long value)
+ {
+ char buf[128];
+ sprintf(buf, "%lld", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+ }
+
+ PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned long long value)
+ {
+ char buf[128];
+ sprintf(buf, "%llu", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+ }
+#endif
+
+ // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
+ PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
+ {
+ #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
+ // there are 64-bit versions of fseek/ftell, let's use them
+ typedef __int64 length_type;
+
+ _fseeki64(file, 0, SEEK_END);
+ length_type length = _ftelli64(file);
+ _fseeki64(file, 0, SEEK_SET);
+ #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
+ // there are 64-bit versions of fseek/ftell, let's use them
+ typedef off64_t length_type;
+
+ fseeko64(file, 0, SEEK_END);
+ length_type length = ftello64(file);
+ fseeko64(file, 0, SEEK_SET);
+ #else
+ // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
+ typedef long length_type;
+
+ fseek(file, 0, SEEK_END);
+ length_type length = ftell(file);
+ fseek(file, 0, SEEK_SET);
+ #endif
+
+ // check for I/O errors
+ if (length < 0) return status_io_error;
+
+ // check for overflow
+ size_t result = static_cast<size_t>(length);
+
+ if (static_cast<length_type>(result) != length) return status_out_of_memory;
+
+ // finalize
+ out_result = result;
+
+ return status_ok;
+ }
+
+ PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
+ {
+ // We only need to zero-terminate if encoding conversion does not do it for us
+ #ifdef PUGIXML_WCHAR_MODE
+ xml_encoding wchar_encoding = get_wchar_encoding();
+
+ if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
+ {
+ size_t length = size / sizeof(char_t);
+
+ static_cast<char_t*>(buffer)[length] = 0;
+ return (length + 1) * sizeof(char_t);
+ }
+ #else
+ if (encoding == encoding_utf8)
+ {
+ static_cast<char*>(buffer)[size] = 0;
+ return size + 1;
+ }
+ #endif
+
+ return size;
+ }
+
+ PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
+ {
+ if (!file) return make_parse_result(status_file_not_found);
+
+ // get file size (can result in I/O errors)
+ size_t size = 0;
+ xml_parse_status size_status = get_file_size(file, size);
+
+ if (size_status != status_ok)
+ {
+ fclose(file);
+ return make_parse_result(size_status);
+ }
+
+ size_t max_suffix_size = sizeof(char_t);
+
+ // allocate buffer for the whole file
+ char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
+
+ if (!contents)
+ {
+ fclose(file);
+ return make_parse_result(status_out_of_memory);
+ }
+
+ // read file in memory
+ size_t read_size = fread(contents, 1, size, file);
+ fclose(file);
+
+ if (read_size != size)
+ {
+ xml_memory::deallocate(contents);
+ return make_parse_result(status_io_error);
+ }
+
+ xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
+
+ return doc.load_buffer_inplace_own(contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding);
+ }
+
+#ifndef PUGIXML_NO_STL
+ template <typename T> struct xml_stream_chunk
+ {
+ static xml_stream_chunk* create()
+ {
+ void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
+
+ return new (memory) xml_stream_chunk();
+ }
+
+ static void destroy(void* ptr)
+ {
+ xml_stream_chunk* chunk = static_cast<xml_stream_chunk*>(ptr);
+
+ // free chunk chain
+ while (chunk)
+ {
+ xml_stream_chunk* next_ = chunk->next;
+
+ xml_memory::deallocate(chunk);
+
+ chunk = next_;
+ }
+ }
+
+ xml_stream_chunk(): next(0), size(0)
+ {
+ }
+
+ xml_stream_chunk* next;
+ size_t size;
+
+ T data[xml_memory_page_size / sizeof(T)];
+ };
+
+ template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
+ {
+ buffer_holder chunks(0, xml_stream_chunk<T>::destroy);
+
+ // read file to a chunk list
+ size_t total = 0;
+ xml_stream_chunk<T>* last = 0;
+
+ while (!stream.eof())
+ {
+ // allocate new chunk
+ xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
+ if (!chunk) return status_out_of_memory;
+
+ // append chunk to list
+ if (last) last = last->next = chunk;
+ else chunks.data = last = chunk;
+
+ // read data to chunk
+ stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
+ chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
+
+ // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
+ if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
+
+ // guard against huge files (chunk size is small enough to make this overflow check work)
+ if (total + chunk->size < total) return status_out_of_memory;
+ total += chunk->size;
+ }
+
+ size_t max_suffix_size = sizeof(char_t);
+
+ // copy chunk list to a contiguous buffer
+ char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
+ if (!buffer) return status_out_of_memory;
+
+ char* write = buffer;
+
+ for (xml_stream_chunk<T>* chunk = static_cast<xml_stream_chunk<T>*>(chunks.data); chunk; chunk = chunk->next)
+ {
+ assert(write + chunk->size <= buffer + total);
+ memcpy(write, chunk->data, chunk->size);
+ write += chunk->size;
+ }
+
+ assert(write == buffer + total);
+
+ // return buffer
+ *out_buffer = buffer;
+ *out_size = total;
+
+ return status_ok;
+ }
+
+ template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
+ {
+ // get length of remaining data in stream
+ typename std::basic_istream<T>::pos_type pos = stream.tellg();
+ stream.seekg(0, std::ios::end);
+ std::streamoff length = stream.tellg() - pos;
+ stream.seekg(pos);
+
+ if (stream.fail() || pos < 0) return status_io_error;
+
+ // guard against huge files
+ size_t read_length = static_cast<size_t>(length);
+
+ if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
+
+ size_t max_suffix_size = sizeof(char_t);
+
+ // read stream data into memory (guard against stream exceptions with buffer holder)
+ buffer_holder buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
+ if (!buffer.data) return status_out_of_memory;
+
+ stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
+
+ // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
+ if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
+
+ // return buffer
+ size_t actual_length = static_cast<size_t>(stream.gcount());
+ assert(actual_length <= read_length);
+
+ *out_buffer = buffer.release();
+ *out_size = actual_length * sizeof(T);
+
+ return status_ok;
+ }
+
+ template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)
+ {
+ void* buffer = 0;
+ size_t size = 0;
+ xml_parse_status status = status_ok;
+
+ // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
+ if (stream.fail()) return make_parse_result(status_io_error);
+
+ // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
+ if (stream.tellg() < 0)
+ {
+ stream.clear(); // clear error flags that could be set by a failing tellg
+ status = load_stream_data_noseek(stream, &buffer, &size);
+ }
+ else
+ status = load_stream_data_seek(stream, &buffer, &size);
+
+ if (status != status_ok) return make_parse_result(status);
+
+ xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
+
+ return doc.load_buffer_inplace_own(buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding);
+ }
+#endif
+
+#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
+ PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
+ {
+ return _wfopen(path, mode);
+ }
+#else
+ PUGI__FN char* convert_path_heap(const wchar_t* str)
+ {
+ assert(str);
+
+ // first pass: get length in utf8 characters
+ size_t length = strlength_wide(str);
+ size_t size = as_utf8_begin(str, length);
+
+ // allocate resulting string
+ char* result = static_cast<char*>(xml_memory::allocate(size + 1));
+ if (!result) return 0;
+
+ // second pass: convert to utf8
+ as_utf8_end(result, size, str, length);
+
+ return result;
+ }
+
+ PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
+ {
+ // there is no standard function to open wide paths, so our best bet is to try utf8 path
+ char* path_utf8 = convert_path_heap(path);
+ if (!path_utf8) return 0;
+
+ // convert mode to ASCII (we mirror _wfopen interface)
+ char mode_ascii[4] = {0};
+ for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
+
+ // try to open the utf8 path
+ FILE* result = fopen(path_utf8, mode_ascii);
+
+ // free dummy buffer
+ xml_memory::deallocate(path_utf8);
+
+ return result;
+ }
+#endif
+
+ PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
+ {
+ if (!file) return false;
+
+ xml_writer_file writer(file);
+ doc.save(writer, indent, flags, encoding);
+
+ int result = ferror(file);
+
+ fclose(file);
+
+ return result == 0;
+ }
+
+ PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
+ {
+ // check input buffer
+ assert(contents || size == 0);
+
+ // get actual encoding
+ xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
+
+ // get private buffer
+ char_t* buffer = 0;
+ size_t length = 0;
+
+ if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
+
+ // delete original buffer if we performed a conversion
+ if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
+
+ // store buffer for offset_debug
+ doc->buffer = buffer;
+
+ // parse
+ xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
+
+ // remember encoding
+ res.encoding = buffer_encoding;
+
+ // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
+ if (own || buffer != contents) *out_buffer = buffer;
+
+ return res;
+ }
+PUGI__NS_END
+
+namespace pugi
+{
+ PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
+ {
+ }
+
+ PUGI__FN void xml_writer_file::write(const void* data, size_t size)
+ {
+ size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
+ (void)!result; // unfortunately we can't do proper error handling here
+ }
+
+#ifndef PUGIXML_NO_STL
+ PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
+ {
+ }
+
+ PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
+ {
+ }
+
+ PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
+ {
+ if (narrow_stream)
+ {
+ assert(!wide_stream);
+ narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
+ }
+ else
+ {
+ assert(wide_stream);
+ assert(size % sizeof(wchar_t) == 0);
+
+ wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
+ }
+ }
+#endif
+
+ PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
+ {
+ }
+
+ PUGI__FN xml_tree_walker::~xml_tree_walker()
+ {
+ }
+
+ PUGI__FN int xml_tree_walker::depth() const
+ {
+ return _depth;
+ }
+
+ PUGI__FN bool xml_tree_walker::begin(xml_node&)
+ {
+ return true;
+ }
+
+ PUGI__FN bool xml_tree_walker::end(xml_node&)
+ {
+ return true;
+ }
+
+ PUGI__FN xml_attribute::xml_attribute(): _attr(0)
+ {
+ }
+
+ PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
+ {
+ }
+
+ PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
+ {
+ }
+
+ PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
+ {
+ return _attr ? unspecified_bool_xml_attribute : 0;
+ }
+
+ PUGI__FN bool xml_attribute::operator!() const
+ {
+ return !_attr;
+ }
+
+ PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
+ {
+ return (_attr == r._attr);
+ }
+
+ PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
+ {
+ return (_attr != r._attr);
+ }
+
+ PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
+ {
+ return (_attr < r._attr);
+ }
+
+ PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
+ {
+ return (_attr > r._attr);
+ }
+
+ PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
+ {
+ return (_attr <= r._attr);
+ }
+
+ PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
+ {
+ return (_attr >= r._attr);
+ }
+
+ PUGI__FN xml_attribute xml_attribute::next_attribute() const
+ {
+ return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
+ }
+
+ PUGI__FN xml_attribute xml_attribute::previous_attribute() const
+ {
+ return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
+ }
+
+ PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
+ {
+ return (_attr && _attr->value) ? _attr->value : def;
+ }
+
+ PUGI__FN int xml_attribute::as_int(int def) const
+ {
+ return impl::get_value_int(_attr ? _attr->value : 0, def);
+ }
+
+ PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
+ {
+ return impl::get_value_uint(_attr ? _attr->value : 0, def);
+ }
+
+ PUGI__FN double xml_attribute::as_double(double def) const
+ {
+ return impl::get_value_double(_attr ? _attr->value : 0, def);
+ }
+
+ PUGI__FN float xml_attribute::as_float(float def) const
+ {
+ return impl::get_value_float(_attr ? _attr->value : 0, def);
+ }
+
+ PUGI__FN bool xml_attribute::as_bool(bool def) const
+ {
+ return impl::get_value_bool(_attr ? _attr->value : 0, def);
+ }
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ PUGI__FN long long xml_attribute::as_llong(long long def) const
+ {
+ return impl::get_value_llong(_attr ? _attr->value : 0, def);
+ }
+
+ PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
+ {
+ return impl::get_value_ullong(_attr ? _attr->value : 0, def);
+ }
+#endif
+
+ PUGI__FN bool xml_attribute::empty() const
+ {
+ return !_attr;
+ }
+
+ PUGI__FN const char_t* xml_attribute::name() const
+ {
+ return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");
+ }
+
+ PUGI__FN const char_t* xml_attribute::value() const
+ {
+ return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");
+ }
+
+ PUGI__FN size_t xml_attribute::hash_value() const
+ {
+ return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
+ }
+
+ PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
+ {
+ return _attr;
+ }
+
+ PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
+ {
+ set_value(rhs);
+ return *this;
+ }
+
+ PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
+ {
+ set_value(rhs);
+ return *this;
+ }
+
+ PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
+ {
+ set_value(rhs);
+ return *this;
+ }
+
+ PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
+ {
+ set_value(rhs);
+ return *this;
+ }
+
+ PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
+ {
+ set_value(rhs);
+ return *this;
+ }
+
+ PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
+ {
+ set_value(rhs);
+ return *this;
+ }
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
+ {
+ set_value(rhs);
+ return *this;
+ }
+
+ PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
+ {
+ set_value(rhs);
+ return *this;
+ }
+#endif
+
+ PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
+ {
+ if (!_attr) return false;
+
+ return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs);
+ }
+
+ PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
+ {
+ if (!_attr) return false;
+
+ return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+ }
+
+ PUGI__FN bool xml_attribute::set_value(int rhs)
+ {
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+ }
+
+ PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
+ {
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+ }
+
+ PUGI__FN bool xml_attribute::set_value(double rhs)
+ {
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+ }
+
+ PUGI__FN bool xml_attribute::set_value(float rhs)
+ {
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+ }
+
+ PUGI__FN bool xml_attribute::set_value(bool rhs)
+ {
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+ }
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ PUGI__FN bool xml_attribute::set_value(long long rhs)
+ {
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+ }
+
+ PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
+ {
+ if (!_attr) return false;
+
+ return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+ }
+#endif
+
+#ifdef __BORLANDC__
+ PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
+ {
+ return (bool)lhs && rhs;
+ }
+
+ PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
+ {
+ return (bool)lhs || rhs;
+ }
+#endif
+
+ PUGI__FN xml_node::xml_node(): _root(0)
+ {
+ }
+
+ PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
+ {
+ }
+
+ PUGI__FN static void unspecified_bool_xml_node(xml_node***)
+ {
+ }
+
+ PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
+ {
+ return _root ? unspecified_bool_xml_node : 0;
+ }
+
+ PUGI__FN bool xml_node::operator!() const
+ {
+ return !_root;
+ }
+
+ PUGI__FN xml_node::iterator xml_node::begin() const
+ {
+ return iterator(_root ? _root->first_child : 0, _root);
+ }
+
+ PUGI__FN xml_node::iterator xml_node::end() const
+ {
+ return iterator(0, _root);
+ }
+
+ PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
+ {
+ return attribute_iterator(_root ? _root->first_attribute : 0, _root);
+ }
+
+ PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
+ {
+ return attribute_iterator(0, _root);
+ }
+
+ PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
+ {
+ return xml_object_range<xml_node_iterator>(begin(), end());
+ }
+
+ PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
+ {
+ return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
+ }
+
+ PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
+ {
+ return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
+ }
+
+ PUGI__FN bool xml_node::operator==(const xml_node& r) const
+ {
+ return (_root == r._root);
+ }
+
+ PUGI__FN bool xml_node::operator!=(const xml_node& r) const
+ {
+ return (_root != r._root);
+ }
+
+ PUGI__FN bool xml_node::operator<(const xml_node& r) const
+ {
+ return (_root < r._root);
+ }
+
+ PUGI__FN bool xml_node::operator>(const xml_node& r) const
+ {
+ return (_root > r._root);
+ }
+
+ PUGI__FN bool xml_node::operator<=(const xml_node& r) const
+ {
+ return (_root <= r._root);
+ }
+
+ PUGI__FN bool xml_node::operator>=(const xml_node& r) const
+ {
+ return (_root >= r._root);
+ }
+
+ PUGI__FN bool xml_node::empty() const
+ {
+ return !_root;
+ }
+
+ PUGI__FN const char_t* xml_node::name() const
+ {
+ return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");
+ }
+
+ PUGI__FN xml_node_type xml_node::type() const
+ {
+ return _root ? PUGI__NODETYPE(_root) : node_null;
+ }
+
+ PUGI__FN const char_t* xml_node::value() const
+ {
+ return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");
+ }
+
+ PUGI__FN xml_node xml_node::child(const char_t* name_) const
+ {
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+ return xml_node();
+ }
+
+ PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
+ {
+ if (!_root) return xml_attribute();
+
+ for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
+ if (i->name && impl::strequal(name_, i->name))
+ return xml_attribute(i);
+
+ return xml_attribute();
+ }
+
+ PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
+ {
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
+ if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+ return xml_node();
+ }
+
+ PUGI__FN xml_node xml_node::next_sibling() const
+ {
+ return _root ? xml_node(_root->next_sibling) : xml_node();
+ }
+
+ PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
+ {
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
+ if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+ return xml_node();
+ }
+
+ PUGI__FN xml_node xml_node::previous_sibling() const
+ {
+ if (!_root) return xml_node();
+
+ if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
+ else return xml_node();
+ }
+
+ PUGI__FN xml_node xml_node::parent() const
+ {
+ return _root ? xml_node(_root->parent) : xml_node();
+ }
+
+ PUGI__FN xml_node xml_node::root() const
+ {
+ return _root ? xml_node(&impl::get_document(_root)) : xml_node();
+ }
+
+ PUGI__FN xml_text xml_node::text() const
+ {
+ return xml_text(_root);
+ }
+
+ PUGI__FN const char_t* xml_node::child_value() const
+ {
+ if (!_root) return PUGIXML_TEXT("");
+
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ if (i->value && impl::is_text_node(i))
+ return i->value;
+
+ return PUGIXML_TEXT("");
+ }
+
+ PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
+ {
+ return child(name_).child_value();
+ }
+
+ PUGI__FN xml_attribute xml_node::first_attribute() const
+ {
+ return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
+ }
+
+ PUGI__FN xml_attribute xml_node::last_attribute() const
+ {
+ return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
+ }
+
+ PUGI__FN xml_node xml_node::first_child() const
+ {
+ return _root ? xml_node(_root->first_child) : xml_node();
+ }
+
+ PUGI__FN xml_node xml_node::last_child() const
+ {
+ return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
+ }
+
+ PUGI__FN bool xml_node::set_name(const char_t* rhs)
+ {
+ switch (type())
+ {
+ case node_pi:
+ case node_declaration:
+ case node_element:
+ return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs);
+
+ default:
+ return false;
+ }
+ }
+
+ PUGI__FN bool xml_node::set_value(const char_t* rhs)
+ {
+ switch (type())
+ {
+ case node_pi:
+ case node_cdata:
+ case node_pcdata:
+ case node_comment:
+ case node_doctype:
+ return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs);
+
+ default:
+ return false;
+ }
+ }
+
+ PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
+ {
+ if (!impl::allow_insert_attribute(type())) return xml_attribute();
+
+ xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+ if (!a) return xml_attribute();
+
+ impl::append_attribute(a._attr, _root);
+
+ a.set_name(name_);
+
+ return a;
+ }
+
+ PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
+ {
+ if (!impl::allow_insert_attribute(type())) return xml_attribute();
+
+ xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+ if (!a) return xml_attribute();
+
+ impl::prepend_attribute(a._attr, _root);
+
+ a.set_name(name_);
+
+ return a;
+ }
+
+ PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
+ {
+ if (!impl::allow_insert_attribute(type())) return xml_attribute();
+ if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
+
+ xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+ if (!a) return xml_attribute();
+
+ impl::insert_attribute_after(a._attr, attr._attr, _root);
+
+ a.set_name(name_);
+
+ return a;
+ }
+
+ PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
+ {
+ if (!impl::allow_insert_attribute(type())) return xml_attribute();
+ if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
+
+ xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+ if (!a) return xml_attribute();
+
+ impl::insert_attribute_before(a._attr, attr._attr, _root);
+
+ a.set_name(name_);
+
+ return a;
+ }
+
+ PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
+ {
+ if (!proto) return xml_attribute();
+
+ xml_attribute result = append_attribute(proto.name());
+ result.set_value(proto.value());
+
+ return result;
+ }
+
+ PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
+ {
+ if (!proto) return xml_attribute();
+
+ xml_attribute result = prepend_attribute(proto.name());
+ result.set_value(proto.value());
+
+ return result;
+ }
+
+ PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
+ {
+ if (!proto) return xml_attribute();
+
+ xml_attribute result = insert_attribute_after(proto.name(), attr);
+ result.set_value(proto.value());
+
+ return result;
+ }
+
+ PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
+ {
+ if (!proto) return xml_attribute();
+
+ xml_attribute result = insert_attribute_before(proto.name(), attr);
+ result.set_value(proto.value());
+
+ return result;
+ }
+
+ PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
+ {
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+
+ xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+ if (!n) return xml_node();
+
+ impl::append_node(n._root, _root);
+
+ if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+ return n;
+ }
+
+ PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
+ {
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+
+ xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+ if (!n) return xml_node();
+
+ impl::prepend_node(n._root, _root);
+
+ if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+ return n;
+ }
+
+ PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
+ {
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+
+ xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+ if (!n) return xml_node();
+
+ impl::insert_node_before(n._root, node._root);
+
+ if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+ return n;
+ }
+
+ PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
+ {
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+
+ xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+ if (!n) return xml_node();
+
+ impl::insert_node_after(n._root, node._root);
+
+ if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+ return n;
+ }
+
+ PUGI__FN xml_node xml_node::append_child(const char_t* name_)
+ {
+ xml_node result = append_child(node_element);
+
+ result.set_name(name_);
+
+ return result;
+ }
+
+ PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
+ {
+ xml_node result = prepend_child(node_element);
+
+ result.set_name(name_);
+
+ return result;
+ }
+
+ PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
+ {
+ xml_node result = insert_child_after(node_element, node);
+
+ result.set_name(name_);
+
+ return result;
+ }
+
+ PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
+ {
+ xml_node result = insert_child_before(node_element, node);
+
+ result.set_name(name_);
+
+ return result;
+ }
+
+ PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
+ {
+ xml_node_type type_ = proto.type();
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+
+ xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+ if (!n) return xml_node();
+
+ impl::append_node(n._root, _root);
+ impl::node_copy_tree(n._root, proto._root);
+
+ return n;
+ }
+
+ PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
+ {
+ xml_node_type type_ = proto.type();
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+
+ xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+ if (!n) return xml_node();
+
+ impl::prepend_node(n._root, _root);
+ impl::node_copy_tree(n._root, proto._root);
+
+ return n;
+ }
+
+ PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
+ {
+ xml_node_type type_ = proto.type();
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+
+ xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+ if (!n) return xml_node();
+
+ impl::insert_node_after(n._root, node._root);
+ impl::node_copy_tree(n._root, proto._root);
+
+ return n;
+ }
+
+ PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
+ {
+ xml_node_type type_ = proto.type();
+ if (!impl::allow_insert_child(type(), type_)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+
+ xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
+ if (!n) return xml_node();
+
+ impl::insert_node_before(n._root, node._root);
+ impl::node_copy_tree(n._root, proto._root);
+
+ return n;
+ }
+
+ PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
+ {
+ if (!impl::allow_move(*this, moved)) return xml_node();
+
+ // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
+ impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
+
+ impl::remove_node(moved._root);
+ impl::append_node(moved._root, _root);
+
+ return moved;
+ }
+
+ PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
+ {
+ if (!impl::allow_move(*this, moved)) return xml_node();
+
+ // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
+ impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
+
+ impl::remove_node(moved._root);
+ impl::prepend_node(moved._root, _root);
+
+ return moved;
+ }
+
+ PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
+ {
+ if (!impl::allow_move(*this, moved)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+ if (moved._root == node._root) return xml_node();
+
+ // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
+ impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
+
+ impl::remove_node(moved._root);
+ impl::insert_node_after(moved._root, node._root);
+
+ return moved;
+ }
+
+ PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
+ {
+ if (!impl::allow_move(*this, moved)) return xml_node();
+ if (!node._root || node._root->parent != _root) return xml_node();
+ if (moved._root == node._root) return xml_node();
+
+ // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
+ impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
+
+ impl::remove_node(moved._root);
+ impl::insert_node_before(moved._root, node._root);
+
+ return moved;
+ }
+
+ PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
+ {
+ return remove_attribute(attribute(name_));
+ }
+
+ PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
+ {
+ if (!_root || !a._attr) return false;
+ if (!impl::is_attribute_of(a._attr, _root)) return false;
+
+ impl::remove_attribute(a._attr, _root);
+ impl::destroy_attribute(a._attr, impl::get_allocator(_root));
+
+ return true;
+ }
+
+ PUGI__FN bool xml_node::remove_child(const char_t* name_)
+ {
+ return remove_child(child(name_));
+ }
+
+ PUGI__FN bool xml_node::remove_child(const xml_node& n)
+ {
+ if (!_root || !n._root || n._root->parent != _root) return false;
+
+ impl::remove_node(n._root);
+ impl::destroy_node(n._root, impl::get_allocator(_root));
+
+ return true;
+ }
+
+ PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
+ {
+ // append_buffer is only valid for elements/documents
+ if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
+
+ // get document node
+ impl::xml_document_struct* doc = &impl::get_document(_root);
+
+ // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
+ doc->header |= impl::xml_memory_page_contents_shared_mask;
+
+ // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
+ impl::xml_memory_page* page = 0;
+ impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
+ (void)page;
+
+ if (!extra) return impl::make_parse_result(status_out_of_memory);
+
+ // save name; name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
+ char_t* rootname = _root->name;
+ _root->name = 0;
+
+ // parse
+ char_t* buffer = 0;
+ xml_parse_result res = impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &buffer);
+
+ // restore name
+ _root->name = rootname;
+
+ // add extra buffer to the list
+ extra->buffer = buffer;
+ extra->next = doc->extra_buffers;
+ doc->extra_buffers = extra;
+
+ return res;
+ }
+
+ PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
+ {
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ if (i->name && impl::strequal(name_, i->name))
+ {
+ for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
+ if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT("")))
+ return xml_node(i);
+ }
+
+ return xml_node();
+ }
+
+ PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
+ {
+ if (!_root) return xml_node();
+
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
+ if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT("")))
+ return xml_node(i);
+
+ return xml_node();
+ }
+
+#ifndef PUGIXML_NO_STL
+ PUGI__FN string_t xml_node::path(char_t delimiter) const
+ {
+ xml_node cursor = *this; // Make a copy.
+
+ string_t result = cursor.name();
+
+ while (cursor.parent())
+ {
+ cursor = cursor.parent();
+
+ string_t temp = cursor.name();
+ temp += delimiter;
+ temp += result;
+ result.swap(temp);
+ }
+
+ return result;
+ }
+#endif
+
+ PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
+ {
+ xml_node found = *this; // Current search context.
+
+ if (!_root || !path_ || !path_[0]) return found;
+
+ if (path_[0] == delimiter)
+ {
+ // Absolute path; e.g. '/foo/bar'
+ found = found.root();
+ ++path_;
+ }
+
+ const char_t* path_segment = path_;
+
+ while (*path_segment == delimiter) ++path_segment;
+
+ const char_t* path_segment_end = path_segment;
+
+ while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
+
+ if (path_segment == path_segment_end) return found;
+
+ const char_t* next_segment = path_segment_end;
+
+ while (*next_segment == delimiter) ++next_segment;
+
+ if (*path_segment == '.' && path_segment + 1 == path_segment_end)
+ return found.first_element_by_path(next_segment, delimiter);
+ else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
+ return found.parent().first_element_by_path(next_segment, delimiter);
+ else
+ {
+ for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
+ {
+ if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
+ {
+ xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
+
+ if (subsearch) return subsearch;
+ }
+ }
+
+ return xml_node();
+ }
+ }
+
+ PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
+ {
+ walker._depth = -1;
+
+ xml_node arg_begin = *this;
+ if (!walker.begin(arg_begin)) return false;
+
+ xml_node cur = first_child();
+
+ if (cur)
+ {
+ ++walker._depth;
+
+ do
+ {
+ xml_node arg_for_each = cur;
+ if (!walker.for_each(arg_for_each))
+ return false;
+
+ if (cur.first_child())
+ {
+ ++walker._depth;
+ cur = cur.first_child();
+ }
+ else if (cur.next_sibling())
+ cur = cur.next_sibling();
+ else
+ {
+ // Borland C++ workaround
+ while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
+ {
+ --walker._depth;
+ cur = cur.parent();
+ }
+
+ if (cur != *this)
+ cur = cur.next_sibling();
+ }
+ }
+ while (cur && cur != *this);
+ }
+
+ assert(walker._depth == -1);
+
+ xml_node arg_end = *this;
+ return walker.end(arg_end);
+ }
+
+ PUGI__FN size_t xml_node::hash_value() const
+ {
+ return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
+ }
+
+ PUGI__FN xml_node_struct* xml_node::internal_object() const
+ {
+ return _root;
+ }
+
+ PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
+ {
+ if (!_root) return;
+
+ impl::xml_buffered_writer buffered_writer(writer, encoding);
+
+ impl::node_output(buffered_writer, _root, indent, flags, depth);
+ }
+
+#ifndef PUGIXML_NO_STL
+ PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
+ {
+ xml_writer_stream writer(stream);
+
+ print(writer, indent, flags, encoding, depth);
+ }
+
+ PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
+ {
+ xml_writer_stream writer(stream);
+
+ print(writer, indent, flags, encoding_wchar, depth);
+ }
+#endif
+
+ PUGI__FN ptrdiff_t xml_node::offset_debug() const
+ {
+ if (!_root) return -1;
+
+ impl::xml_document_struct& doc = impl::get_document(_root);
+
+ // we can determine the offset reliably only if there is exactly once parse buffer
+ if (!doc.buffer || doc.extra_buffers) return -1;
+
+ switch (type())
+ {
+ case node_document:
+ return 0;
+
+ case node_element:
+ case node_declaration:
+ case node_pi:
+ return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
+
+ case node_pcdata:
+ case node_cdata:
+ case node_comment:
+ case node_doctype:
+ return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
+
+ default:
+ return -1;
+ }
+ }
+
+#ifdef __BORLANDC__
+ PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
+ {
+ return (bool)lhs && rhs;
+ }
+
+ PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
+ {
+ return (bool)lhs || rhs;
+ }
+#endif
+
+ PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
+ {
+ }
+
+ PUGI__FN xml_node_struct* xml_text::_data() const
+ {
+ if (!_root || impl::is_text_node(_root)) return _root;
+
+ for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
+ if (impl::is_text_node(node))
+ return node;
+
+ return 0;
+ }
+
+ PUGI__FN xml_node_struct* xml_text::_data_new()
+ {
+ xml_node_struct* d = _data();
+ if (d) return d;
+
+ return xml_node(_root).append_child(node_pcdata).internal_object();
+ }
+
+ PUGI__FN xml_text::xml_text(): _root(0)
+ {
+ }
+
+ PUGI__FN static void unspecified_bool_xml_text(xml_text***)
+ {
+ }
+
+ PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
+ {
+ return _data() ? unspecified_bool_xml_text : 0;
+ }
+
+ PUGI__FN bool xml_text::operator!() const
+ {
+ return !_data();
+ }
+
+ PUGI__FN bool xml_text::empty() const
+ {
+ return _data() == 0;
+ }
+
+ PUGI__FN const char_t* xml_text::get() const
+ {
+ xml_node_struct* d = _data();
+
+ return (d && d->value) ? d->value : PUGIXML_TEXT("");
+ }
+
+ PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
+ {
+ xml_node_struct* d = _data();
+
+ return (d && d->value) ? d->value : def;
+ }
+
+ PUGI__FN int xml_text::as_int(int def) const
+ {
+ xml_node_struct* d = _data();
+
+ return impl::get_value_int(d ? d->value : 0, def);
+ }
+
+ PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
+ {
+ xml_node_struct* d = _data();
+
+ return impl::get_value_uint(d ? d->value : 0, def);
+ }
+
+ PUGI__FN double xml_text::as_double(double def) const
+ {
+ xml_node_struct* d = _data();
+
+ return impl::get_value_double(d ? d->value : 0, def);
+ }
+
+ PUGI__FN float xml_text::as_float(float def) const
+ {
+ xml_node_struct* d = _data();
+
+ return impl::get_value_float(d ? d->value : 0, def);
+ }
+
+ PUGI__FN bool xml_text::as_bool(bool def) const
+ {
+ xml_node_struct* d = _data();
+
+ return impl::get_value_bool(d ? d->value : 0, def);
+ }
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ PUGI__FN long long xml_text::as_llong(long long def) const
+ {
+ xml_node_struct* d = _data();
+
+ return impl::get_value_llong(d ? d->value : 0, def);
+ }
+
+ PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
+ {
+ xml_node_struct* d = _data();
+
+ return impl::get_value_ullong(d ? d->value : 0, def);
+ }
+#endif
+
+ PUGI__FN bool xml_text::set(const char_t* rhs)
+ {
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+ }
+
+ PUGI__FN bool xml_text::set(int rhs)
+ {
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+ }
+
+ PUGI__FN bool xml_text::set(unsigned int rhs)
+ {
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+ }
+
+ PUGI__FN bool xml_text::set(float rhs)
+ {
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+ }
+
+ PUGI__FN bool xml_text::set(double rhs)
+ {
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+ }
+
+ PUGI__FN bool xml_text::set(bool rhs)
+ {
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+ }
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ PUGI__FN bool xml_text::set(long long rhs)
+ {
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+ }
+
+ PUGI__FN bool xml_text::set(unsigned long long rhs)
+ {
+ xml_node_struct* dn = _data_new();
+
+ return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+ }
+#endif
+
+ PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
+ {
+ set(rhs);
+ return *this;
+ }
+
+ PUGI__FN xml_text& xml_text::operator=(int rhs)
+ {
+ set(rhs);
+ return *this;
+ }
+
+ PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
+ {
+ set(rhs);
+ return *this;
+ }
+
+ PUGI__FN xml_text& xml_text::operator=(double rhs)
+ {
+ set(rhs);
+ return *this;
+ }
+
+ PUGI__FN xml_text& xml_text::operator=(float rhs)
+ {
+ set(rhs);
+ return *this;
+ }
+
+ PUGI__FN xml_text& xml_text::operator=(bool rhs)
+ {
+ set(rhs);
+ return *this;
+ }
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ PUGI__FN xml_text& xml_text::operator=(long long rhs)
+ {
+ set(rhs);
+ return *this;
+ }
+
+ PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
+ {
+ set(rhs);
+ return *this;
+ }
+#endif
+
+ PUGI__FN xml_node xml_text::data() const
+ {
+ return xml_node(_data());
+ }
+
+#ifdef __BORLANDC__
+ PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
+ {
+ return (bool)lhs && rhs;
+ }
+
+ PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
+ {
+ return (bool)lhs || rhs;
+ }
+#endif
+
+ PUGI__FN xml_node_iterator::xml_node_iterator()
+ {
+ }
+
+ PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
+ {
+ }
+
+ PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
+ {
+ }
+
+ PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
+ {
+ return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
+ }
+
+ PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
+ {
+ return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
+ }
+
+ PUGI__FN xml_node& xml_node_iterator::operator*() const
+ {
+ assert(_wrap._root);
+ return _wrap;
+ }
+
+ PUGI__FN xml_node* xml_node_iterator::operator->() const
+ {
+ assert(_wrap._root);
+ return const_cast<xml_node*>(&_wrap); // BCC32 workaround
+ }
+
+ PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
+ {
+ assert(_wrap._root);
+ _wrap._root = _wrap._root->next_sibling;
+ return *this;
+ }
+
+ PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
+ {
+ xml_node_iterator temp = *this;
+ ++*this;
+ return temp;
+ }
+
+ PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
+ {
+ _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
+ return *this;
+ }
+
+ PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
+ {
+ xml_node_iterator temp = *this;
+ --*this;
+ return temp;
+ }
+
+ PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
+ {
+ }
+
+ PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
+ {
+ }
+
+ PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
+ {
+ }
+
+ PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
+ {
+ return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
+ }
+
+ PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
+ {
+ return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
+ }
+
+ PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
+ {
+ assert(_wrap._attr);
+ return _wrap;
+ }
+
+ PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
+ {
+ assert(_wrap._attr);
+ return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
+ }
+
+ PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
+ {
+ assert(_wrap._attr);
+ _wrap._attr = _wrap._attr->next_attribute;
+ return *this;
+ }
+
+ PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
+ {
+ xml_attribute_iterator temp = *this;
+ ++*this;
+ return temp;
+ }
+
+ PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
+ {
+ _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
+ return *this;
+ }
+
+ PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
+ {
+ xml_attribute_iterator temp = *this;
+ --*this;
+ return temp;
+ }
+
+ PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
+ {
+ }
+
+ PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
+ {
+ }
+
+ PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
+ {
+ }
+
+ PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
+ {
+ return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
+ }
+
+ PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
+ {
+ return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
+ }
+
+ PUGI__FN xml_node& xml_named_node_iterator::operator*() const
+ {
+ assert(_wrap._root);
+ return _wrap;
+ }
+
+ PUGI__FN xml_node* xml_named_node_iterator::operator->() const
+ {
+ assert(_wrap._root);
+ return const_cast<xml_node*>(&_wrap); // BCC32 workaround
+ }
+
+ PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
+ {
+ assert(_wrap._root);
+ _wrap = _wrap.next_sibling(_name);
+ return *this;
+ }
+
+ PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
+ {
+ xml_named_node_iterator temp = *this;
+ ++*this;
+ return temp;
+ }
+
+ PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
+ {
+ if (_wrap._root)
+ _wrap = _wrap.previous_sibling(_name);
+ else
+ {
+ _wrap = _parent.last_child();
+
+ if (!impl::strequal(_wrap.name(), _name))
+ _wrap = _wrap.previous_sibling(_name);
+ }
+
+ return *this;
+ }
+
+ PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
+ {
+ xml_named_node_iterator temp = *this;
+ --*this;
+ return temp;
+ }
+
+ PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
+ {
+ }
+
+ PUGI__FN xml_parse_result::operator bool() const
+ {
+ return status == status_ok;
+ }
+
+ PUGI__FN const char* xml_parse_result::description() const
+ {
+ switch (status)
+ {
+ case status_ok: return "No error";
+
+ case status_file_not_found: return "File was not found";
+ case status_io_error: return "Error reading from file/stream";
+ case status_out_of_memory: return "Could not allocate memory";
+ case status_internal_error: return "Internal error occurred";
+
+ case status_unrecognized_tag: return "Could not determine tag type";
+
+ case status_bad_pi: return "Error parsing document declaration/processing instruction";
+ case status_bad_comment: return "Error parsing comment";
+ case status_bad_cdata: return "Error parsing CDATA section";
+ case status_bad_doctype: return "Error parsing document type declaration";
+ case status_bad_pcdata: return "Error parsing PCDATA section";
+ case status_bad_start_element: return "Error parsing start element tag";
+ case status_bad_attribute: return "Error parsing element attribute";
+ case status_bad_end_element: return "Error parsing end element tag";
+ case status_end_element_mismatch: return "Start-end tags mismatch";
+
+ case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
+
+ case status_no_document_element: return "No document element found";
+
+ default: return "Unknown error";
+ }
+ }
+
+ PUGI__FN xml_document::xml_document(): _buffer(0)
+ {
+ create();
+ }
+
+ PUGI__FN xml_document::~xml_document()
+ {
+ destroy();
+ }
+
+ PUGI__FN void xml_document::reset()
+ {
+ destroy();
+ create();
+ }
+
+ PUGI__FN void xml_document::reset(const xml_document& proto)
+ {
+ reset();
+
+ for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
+ append_copy(cur);
+ }
+
+ PUGI__FN void xml_document::create()
+ {
+ assert(!_root);
+
+ // initialize sentinel page
+ PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment - sizeof(void*) <= sizeof(_memory));
+
+ // align upwards to page boundary
+ void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
+
+ // prepare page structure
+ impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
+ assert(page);
+
+ page->busy_size = impl::xml_memory_page_size;
+
+ // allocate new root
+ _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)) impl::xml_document_struct(page);
+ _root->prev_sibling_c = _root;
+
+ // setup sentinel page
+ page->allocator = static_cast<impl::xml_document_struct*>(_root);
+
+ // verify the document allocation
+ assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
+ }
+
+ PUGI__FN void xml_document::destroy()
+ {
+ assert(_root);
+
+ // destroy static storage
+ if (_buffer)
+ {
+ impl::xml_memory::deallocate(_buffer);
+ _buffer = 0;
+ }
+
+ // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
+ for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
+ {
+ if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
+ }
+
+ // destroy dynamic storage, leave sentinel page (it's in static memory)
+ impl::xml_memory_page* root_page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
+ assert(root_page && !root_page->prev);
+ assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
+
+ for (impl::xml_memory_page* page = root_page->next; page; )
+ {
+ impl::xml_memory_page* next = page->next;
+
+ impl::xml_allocator::deallocate_page(page);
+
+ page = next;
+ }
+
+ _root = 0;
+ }
+
+#ifndef PUGIXML_NO_STL
+ PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
+ {
+ reset();
+
+ return impl::load_stream_impl(*this, stream, options, encoding);
+ }
+
+ PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
+ {
+ reset();
+
+ return impl::load_stream_impl(*this, stream, options, encoding_wchar);
+ }
+#endif
+
+ PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
+ {
+ // Force native encoding (skip autodetection)
+ #ifdef PUGIXML_WCHAR_MODE
+ xml_encoding encoding = encoding_wchar;
+ #else
+ xml_encoding encoding = encoding_utf8;
+ #endif
+
+ return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
+ }
+
+ PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
+ {
+ return load_string(contents, options);
+ }
+
+ PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
+ {
+ reset();
+
+ FILE* file = fopen(path_, "rb");
+
+ return impl::load_file_impl(*this, file, options, encoding);
+ }
+
+ PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
+ {
+ reset();
+
+ FILE* file = impl::open_file_wide(path_, L"rb");
+
+ return impl::load_file_impl(*this, file, options, encoding);
+ }
+
+ PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
+ {
+ reset();
+
+ return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
+ }
+
+ PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
+ {
+ reset();
+
+ return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
+ }
+
+ PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
+ {
+ reset();
+
+ return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
+ }
+
+ PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+ {
+ impl::xml_buffered_writer buffered_writer(writer, encoding);
+
+ if ((flags & format_write_bom) && encoding != encoding_latin1)
+ {
+ // BOM always represents the codepoint U+FEFF, so just write it in native encoding
+ #ifdef PUGIXML_WCHAR_MODE
+ unsigned int bom = 0xfeff;
+ buffered_writer.write(static_cast<wchar_t>(bom));
+ #else
+ buffered_writer.write('\xef', '\xbb', '\xbf');
+ #endif
+ }
+
+ if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
+ {
+ buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
+ if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
+ buffered_writer.write('?', '>');
+ if (!(flags & format_raw)) buffered_writer.write('\n');
+ }
+
+ impl::node_output(buffered_writer, _root, indent, flags, 0);
+ }
+
+#ifndef PUGIXML_NO_STL
+ PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+ {
+ xml_writer_stream writer(stream);
+
+ save(writer, indent, flags, encoding);
+ }
+
+ PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
+ {
+ xml_writer_stream writer(stream);
+
+ save(writer, indent, flags, encoding_wchar);
+ }
+#endif
+
+ PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+ {
+ FILE* file = fopen(path_, (flags & format_save_file_text) ? "w" : "wb");
+ return impl::save_file_impl(*this, file, indent, flags, encoding);
+ }
+
+ PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+ {
+ FILE* file = impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb");
+ return impl::save_file_impl(*this, file, indent, flags, encoding);
+ }
+
+ PUGI__FN xml_node xml_document::document_element() const
+ {
+ assert(_root);
+
+ for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+ if (PUGI__NODETYPE(i) == node_element)
+ return xml_node(i);
+
+ return xml_node();
+ }
+
+#ifndef PUGIXML_NO_STL
+ PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
+ {
+ assert(str);
+
+ return impl::as_utf8_impl(str, impl::strlength_wide(str));
+ }
+
+ PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
+ {
+ return impl::as_utf8_impl(str.c_str(), str.size());
+ }
+
+ PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
+ {
+ assert(str);
+
+ return impl::as_wide_impl(str, strlen(str));
+ }
+
+ PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
+ {
+ return impl::as_wide_impl(str.c_str(), str.size());
+ }
+#endif
+
+ PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
+ {
+ impl::xml_memory::allocate = allocate;
+ impl::xml_memory::deallocate = deallocate;
+ }
+
+ PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
+ {
+ return impl::xml_memory::allocate;
+ }
+
+ PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
+ {
+ return impl::xml_memory::deallocate;
+ }
+}
+
+#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
+namespace std
+{
+ // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
+ PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
+ {
+ return std::bidirectional_iterator_tag();
+ }
+
+ PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
+ {
+ return std::bidirectional_iterator_tag();
+ }
+
+ PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
+ {
+ return std::bidirectional_iterator_tag();
+ }
+}
+#endif
+
+#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
+namespace std
+{
+ // Workarounds for (non-standard) iterator category detection
+ PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
+ {
+ return std::bidirectional_iterator_tag();
+ }
+
+ PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
+ {
+ return std::bidirectional_iterator_tag();
+ }
+
+ PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
+ {
+ return std::bidirectional_iterator_tag();
+ }
+}
+#endif
+
+#ifndef PUGIXML_NO_XPATH
+// STL replacements
+PUGI__NS_BEGIN
+ struct equal_to
+ {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const
+ {
+ return lhs == rhs;
+ }
+ };
+
+ struct not_equal_to
+ {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const
+ {
+ return lhs != rhs;
+ }
+ };
+
+ struct less
+ {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const
+ {
+ return lhs < rhs;
+ }
+ };
+
+ struct less_equal
+ {
+ template <typename T> bool operator()(const T& lhs, const T& rhs) const
+ {
+ return lhs <= rhs;
+ }
+ };
+
+ template <typename T> void swap(T& lhs, T& rhs)
+ {
+ T temp = lhs;
+ lhs = rhs;
+ rhs = temp;
+ }
+
+ template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
+ {
+ I result = begin;
+
+ for (I it = begin + 1; it != end; ++it)
+ if (pred(*it, *result))
+ result = it;
+
+ return result;
+ }
+
+ template <typename I> void reverse(I begin, I end)
+ {
+ while (end - begin > 1) swap(*begin++, *--end);
+ }
+
+ template <typename I> I unique(I begin, I end)
+ {
+ // fast skip head
+ while (end - begin > 1 && *begin != *(begin + 1)) begin++;
+
+ if (begin == end) return begin;
+
+ // last written element
+ I write = begin++;
+
+ // merge unique elements
+ while (begin != end)
+ {
+ if (*begin != *write)
+ *++write = *begin++;
+ else
+ begin++;
+ }
+
+ // past-the-end (write points to live element)
+ return write + 1;
+ }
+
+ template <typename I> void copy_backwards(I begin, I end, I target)
+ {
+ while (begin != end) *--target = *--end;
+ }
+
+ template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
+ {
+ assert(begin != end);
+
+ for (I it = begin + 1; it != end; ++it)
+ {
+ T val = *it;
+
+ if (pred(val, *begin))
+ {
+ // move to front
+ copy_backwards(begin, it, it + 1);
+ *begin = val;
+ }
+ else
+ {
+ I hole = it;
+
+ // move hole backwards
+ while (pred(val, *(hole - 1)))
+ {
+ *hole = *(hole - 1);
+ hole--;
+ }
+
+ // fill hole with element
+ *hole = val;
+ }
+ }
+ }
+
+ // std variant for elements with ==
+ template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
+ {
+ I eqbeg = middle, eqend = middle + 1;
+
+ // expand equal range
+ while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
+ while (eqend != end && *eqend == *eqbeg) ++eqend;
+
+ // process outer elements
+ I ltend = eqbeg, gtbeg = eqend;
+
+ for (;;)
+ {
+ // find the element from the right side that belongs to the left one
+ for (; gtbeg != end; ++gtbeg)
+ if (!pred(*eqbeg, *gtbeg))
+ {
+ if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
+ else break;
+ }
+
+ // find the element from the left side that belongs to the right one
+ for (; ltend != begin; --ltend)
+ if (!pred(*(ltend - 1), *eqbeg))
+ {
+ if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
+ else break;
+ }
+
+ // scanned all elements
+ if (gtbeg == end && ltend == begin)
+ {
+ *out_eqbeg = eqbeg;
+ *out_eqend = eqend;
+ return;
+ }
+
+ // make room for elements by moving equal area
+ if (gtbeg == end)
+ {
+ if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
+ swap(*eqbeg, *--eqend);
+ }
+ else if (ltend == begin)
+ {
+ if (eqend != gtbeg) swap(*eqbeg, *eqend);
+ ++eqend;
+ swap(*gtbeg++, *eqbeg++);
+ }
+ else swap(*gtbeg++, *--ltend);
+ }
+ }
+
+ template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
+ {
+ if (pred(*middle, *first)) swap(*middle, *first);
+ if (pred(*last, *middle)) swap(*last, *middle);
+ if (pred(*middle, *first)) swap(*middle, *first);
+ }
+
+ template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
+ {
+ if (last - first <= 40)
+ {
+ // median of three for small chunks
+ median3(first, middle, last, pred);
+ }
+ else
+ {
+ // median of nine
+ size_t step = (last - first + 1) / 8;
+
+ median3(first, first + step, first + 2 * step, pred);
+ median3(middle - step, middle, middle + step, pred);
+ median3(last - 2 * step, last - step, last, pred);
+ median3(first + step, middle, last - step, pred);
+ }
+ }
+
+ template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
+ {
+ // sort large chunks
+ while (end - begin > 32)
+ {
+ // find median element
+ I middle = begin + (end - begin) / 2;
+ median(begin, middle, end - 1, pred);
+
+ // partition in three chunks (< = >)
+ I eqbeg, eqend;
+ partition(begin, middle, end, pred, &eqbeg, &eqend);
+
+ // loop on larger half
+ if (eqbeg - begin > end - eqend)
+ {
+ sort(eqend, end, pred);
+ end = eqbeg;
+ }
+ else
+ {
+ sort(begin, eqbeg, pred);
+ begin = eqend;
+ }
+ }
+
+ // insertion sort small chunk
+ if (begin != end) insertion_sort(begin, end, pred, &*begin);
+ }
+PUGI__NS_END
+
+// Allocator used for AST and evaluation stacks
+PUGI__NS_BEGIN
+ struct xpath_memory_block
+ {
+ xpath_memory_block* next;
+ size_t capacity;
+
+ char data[
+ #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
+ PUGIXML_MEMORY_XPATH_PAGE_SIZE
+ #else
+ 4096
+ #endif
+ ];
+ };
+
+ class xpath_allocator
+ {
+ xpath_memory_block* _root;
+ size_t _root_size;
+
+ public:
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ jmp_buf* error_handler;
+ #endif
+
+ xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
+ {
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ error_handler = 0;
+ #endif
+ }
+
+ void* allocate_nothrow(size_t size)
+ {
+ // align size so that we're able to store pointers in subsequent blocks
+ size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+
+ if (_root_size + size <= _root->capacity)
+ {
+ void* buf = _root->data + _root_size;
+ _root_size += size;
+ return buf;
+ }
+ else
+ {
+ // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
+ size_t block_capacity_base = sizeof(_root->data);
+ size_t block_capacity_req = size + block_capacity_base / 4;
+ size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
+
+ size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
+
+ xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
+ if (!block) return 0;
+
+ block->next = _root;
+ block->capacity = block_capacity;
+
+ _root = block;
+ _root_size = size;
+
+ return block->data;
+ }
+ }
+
+ void* allocate(size_t size)
+ {
+ void* result = allocate_nothrow(size);
+
+ if (!result)
+ {
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ assert(error_handler);
+ longjmp(*error_handler, 1);
+ #else
+ throw std::bad_alloc();
+ #endif
+ }
+
+ return result;
+ }
+
+ void* reallocate(void* ptr, size_t old_size, size_t new_size)
+ {
+ // align size so that we're able to store pointers in subsequent blocks
+ old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+ new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+
+ // we can only reallocate the last object
+ assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size);
+
+ // adjust root size so that we have not allocated the object at all
+ bool only_object = (_root_size == old_size);
+
+ if (ptr) _root_size -= old_size;
+
+ // allocate a new version (this will obviously reuse the memory if possible)
+ void* result = allocate(new_size);
+ assert(result);
+
+ // we have a new block
+ if (result != ptr && ptr)
+ {
+ // copy old data
+ assert(new_size >= old_size);
+ memcpy(result, ptr, old_size);
+
+ // free the previous page if it had no other objects
+ if (only_object)
+ {
+ assert(_root->data == result);
+ assert(_root->next);
+
+ xpath_memory_block* next = _root->next->next;
+
+ if (next)
+ {
+ // deallocate the whole page, unless it was the first one
+ xml_memory::deallocate(_root->next);
+ _root->next = next;
+ }
+ }
+ }
+
+ return result;
+ }
+
+ void revert(const xpath_allocator& state)
+ {
+ // free all new pages
+ xpath_memory_block* cur = _root;
+
+ while (cur != state._root)
+ {
+ xpath_memory_block* next = cur->next;
+
+ xml_memory::deallocate(cur);
+
+ cur = next;
+ }
+
+ // restore state
+ _root = state._root;
+ _root_size = state._root_size;
+ }
+
+ void release()
+ {
+ xpath_memory_block* cur = _root;
+ assert(cur);
+
+ while (cur->next)
+ {
+ xpath_memory_block* next = cur->next;
+
+ xml_memory::deallocate(cur);
+
+ cur = next;
+ }
+ }
+ };
+
+ struct xpath_allocator_capture
+ {
+ xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
+ {
+ }
+
+ ~xpath_allocator_capture()
+ {
+ _target->revert(_state);
+ }
+
+ xpath_allocator* _target;
+ xpath_allocator _state;
+ };
+
+ struct xpath_stack
+ {
+ xpath_allocator* result;
+ xpath_allocator* temp;
+ };
+
+ struct xpath_stack_data
+ {
+ xpath_memory_block blocks[2];
+ xpath_allocator result;
+ xpath_allocator temp;
+ xpath_stack stack;
+
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ jmp_buf error_handler;
+ #endif
+
+ xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
+ {
+ blocks[0].next = blocks[1].next = 0;
+ blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
+
+ stack.result = &result;
+ stack.temp = &temp;
+
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ result.error_handler = temp.error_handler = &error_handler;
+ #endif
+ }
+
+ ~xpath_stack_data()
+ {
+ result.release();
+ temp.release();
+ }
+ };
+PUGI__NS_END
+
+// String class
+PUGI__NS_BEGIN
+ class xpath_string
+ {
+ const char_t* _buffer;
+ bool _uses_heap;
+ size_t _length_heap;
+
+ static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
+ {
+ char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
+ assert(result);
+
+ memcpy(result, string, length * sizeof(char_t));
+ result[length] = 0;
+
+ return result;
+ }
+
+ xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
+ {
+ }
+
+ public:
+ static xpath_string from_const(const char_t* str)
+ {
+ return xpath_string(str, false, 0);
+ }
+
+ static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
+ {
+ assert(begin <= end && *end == 0);
+
+ return xpath_string(begin, true, static_cast<size_t>(end - begin));
+ }
+
+ static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
+ {
+ assert(begin <= end);
+
+ size_t length = static_cast<size_t>(end - begin);
+
+ return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length);
+ }
+
+ xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
+ {
+ }
+
+ void append(const xpath_string& o, xpath_allocator* alloc)
+ {
+ // skip empty sources
+ if (!*o._buffer) return;
+
+ // fast append for constant empty target and constant source
+ if (!*_buffer && !_uses_heap && !o._uses_heap)
+ {
+ _buffer = o._buffer;
+ }
+ else
+ {
+ // need to make heap copy
+ size_t target_length = length();
+ size_t source_length = o.length();
+ size_t result_length = target_length + source_length;
+
+ // allocate new buffer
+ char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
+ assert(result);
+
+ // append first string to the new buffer in case there was no reallocation
+ if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
+
+ // append second string to the new buffer
+ memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
+ result[result_length] = 0;
+
+ // finalize
+ _buffer = result;
+ _uses_heap = true;
+ _length_heap = result_length;
+ }
+ }
+
+ const char_t* c_str() const
+ {
+ return _buffer;
+ }
+
+ size_t length() const
+ {
+ return _uses_heap ? _length_heap : strlength(_buffer);
+ }
+
+ char_t* data(xpath_allocator* alloc)
+ {
+ // make private heap copy
+ if (!_uses_heap)
+ {
+ size_t length_ = strlength(_buffer);
+
+ _buffer = duplicate_string(_buffer, length_, alloc);
+ _uses_heap = true;
+ _length_heap = length_;
+ }
+
+ return const_cast<char_t*>(_buffer);
+ }
+
+ bool empty() const
+ {
+ return *_buffer == 0;
+ }
+
+ bool operator==(const xpath_string& o) const
+ {
+ return strequal(_buffer, o._buffer);
+ }
+
+ bool operator!=(const xpath_string& o) const
+ {
+ return !strequal(_buffer, o._buffer);
+ }
+
+ bool uses_heap() const
+ {
+ return _uses_heap;
+ }
+ };
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+ PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
+ {
+ while (*pattern && *string == *pattern)
+ {
+ string++;
+ pattern++;
+ }
+
+ return *pattern == 0;
+ }
+
+ PUGI__FN const char_t* find_char(const char_t* s, char_t c)
+ {
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcschr(s, c);
+ #else
+ return strchr(s, c);
+ #endif
+ }
+
+ PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
+ {
+ #ifdef PUGIXML_WCHAR_MODE
+ // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
+ return (*p == 0) ? s : wcsstr(s, p);
+ #else
+ return strstr(s, p);
+ #endif
+ }
+
+ // Converts symbol to lower case, if it is an ASCII one
+ PUGI__FN char_t tolower_ascii(char_t ch)
+ {
+ return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
+ }
+
+ PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
+ {
+ if (na.attribute())
+ return xpath_string::from_const(na.attribute().value());
+ else
+ {
+ xml_node n = na.node();
+
+ switch (n.type())
+ {
+ case node_pcdata:
+ case node_cdata:
+ case node_comment:
+ case node_pi:
+ return xpath_string::from_const(n.value());
+
+ case node_document:
+ case node_element:
+ {
+ xpath_string result;
+
+ xml_node cur = n.first_child();
+
+ while (cur && cur != n)
+ {
+ if (cur.type() == node_pcdata || cur.type() == node_cdata)
+ result.append(xpath_string::from_const(cur.value()), alloc);
+
+ if (cur.first_child())
+ cur = cur.first_child();
+ else if (cur.next_sibling())
+ cur = cur.next_sibling();
+ else
+ {
+ while (!cur.next_sibling() && cur != n)
+ cur = cur.parent();
+
+ if (cur != n) cur = cur.next_sibling();
+ }
+ }
+
+ return result;
+ }
+
+ default:
+ return xpath_string();
+ }
+ }
+ }
+
+ PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
+ {
+ assert(ln->parent == rn->parent);
+
+ // there is no common ancestor (the shared parent is null), nodes are from different documents
+ if (!ln->parent) return ln < rn;
+
+ // determine sibling order
+ xml_node_struct* ls = ln;
+ xml_node_struct* rs = rn;
+
+ while (ls && rs)
+ {
+ if (ls == rn) return true;
+ if (rs == ln) return false;
+
+ ls = ls->next_sibling;
+ rs = rs->next_sibling;
+ }
+
+ // if rn sibling chain ended ln must be before rn
+ return !rs;
+ }
+
+ PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
+ {
+ // find common ancestor at the same depth, if any
+ xml_node_struct* lp = ln;
+ xml_node_struct* rp = rn;
+
+ while (lp && rp && lp->parent != rp->parent)
+ {
+ lp = lp->parent;
+ rp = rp->parent;
+ }
+
+ // parents are the same!
+ if (lp && rp) return node_is_before_sibling(lp, rp);
+
+ // nodes are at different depths, need to normalize heights
+ bool left_higher = !lp;
+
+ while (lp)
+ {
+ lp = lp->parent;
+ ln = ln->parent;
+ }
+
+ while (rp)
+ {
+ rp = rp->parent;
+ rn = rn->parent;
+ }
+
+ // one node is the ancestor of the other
+ if (ln == rn) return left_higher;
+
+ // find common ancestor... again
+ while (ln->parent != rn->parent)
+ {
+ ln = ln->parent;
+ rn = rn->parent;
+ }
+
+ return node_is_before_sibling(ln, rn);
+ }
+
+ PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
+ {
+ while (node && node != parent) node = node->parent;
+
+ return parent && node == parent;
+ }
+
+ PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
+ {
+ xml_node_struct* node = xnode.node().internal_object();
+
+ if (node)
+ {
+ if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
+ {
+ if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
+ if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
+ }
+
+ return 0;
+ }
+
+ xml_attribute_struct* attr = xnode.attribute().internal_object();
+
+ if (attr)
+ {
+ if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
+ {
+ if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
+ if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
+ }
+
+ return 0;
+ }
+
+ return 0;
+ }
+
+ struct document_order_comparator
+ {
+ bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
+ {
+ // optimized document order based check
+ const void* lo = document_buffer_order(lhs);
+ const void* ro = document_buffer_order(rhs);
+
+ if (lo && ro) return lo < ro;
+
+ // slow comparison
+ xml_node ln = lhs.node(), rn = rhs.node();
+
+ // compare attributes
+ if (lhs.attribute() && rhs.attribute())
+ {
+ // shared parent
+ if (lhs.parent() == rhs.parent())
+ {
+ // determine sibling order
+ for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
+ if (a == rhs.attribute())
+ return true;
+
+ return false;
+ }
+
+ // compare attribute parents
+ ln = lhs.parent();
+ rn = rhs.parent();
+ }
+ else if (lhs.attribute())
+ {
+ // attributes go after the parent element
+ if (lhs.parent() == rhs.node()) return false;
+
+ ln = lhs.parent();
+ }
+ else if (rhs.attribute())
+ {
+ // attributes go after the parent element
+ if (rhs.parent() == lhs.node()) return true;
+
+ rn = rhs.parent();
+ }
+
+ if (ln == rn) return false;
+
+ if (!ln || !rn) return ln < rn;
+
+ return node_is_before(ln.internal_object(), rn.internal_object());
+ }
+ };
+
+ struct duplicate_comparator
+ {
+ bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
+ {
+ if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
+ else return rhs.attribute() ? false : lhs.node() < rhs.node();
+ }
+ };
+
+ PUGI__FN double gen_nan()
+ {
+ #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
+ union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
+ u[0].i = 0x7fc00000;
+ return u[0].f;
+ #else
+ // fallback
+ const volatile double zero = 0.0;
+ return zero / zero;
+ #endif
+ }
+
+ PUGI__FN bool is_nan(double value)
+ {
+ #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
+ return !!_isnan(value);
+ #elif defined(fpclassify) && defined(FP_NAN)
+ return fpclassify(value) == FP_NAN;
+ #else
+ // fallback
+ const volatile double v = value;
+ return v != v;
+ #endif
+ }
+
+ PUGI__FN const char_t* convert_number_to_string_special(double value)
+ {
+ #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
+ if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
+ if (_isnan(value)) return PUGIXML_TEXT("NaN");
+ return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+ #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
+ switch (fpclassify(value))
+ {
+ case FP_NAN:
+ return PUGIXML_TEXT("NaN");
+
+ case FP_INFINITE:
+ return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+
+ case FP_ZERO:
+ return PUGIXML_TEXT("0");
+
+ default:
+ return 0;
+ }
+ #else
+ // fallback
+ const volatile double v = value;
+
+ if (v == 0) return PUGIXML_TEXT("0");
+ if (v != v) return PUGIXML_TEXT("NaN");
+ if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+ return 0;
+ #endif
+ }
+
+ PUGI__FN bool convert_number_to_boolean(double value)
+ {
+ return (value != 0 && !is_nan(value));
+ }
+
+ PUGI__FN void truncate_zeros(char* begin, char* end)
+ {
+ while (begin != end && end[-1] == '0') end--;
+
+ *end = 0;
+ }
+
+ // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
+#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
+ PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
+ {
+ // get base values
+ int sign, exponent;
+ _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
+
+ // truncate redundant zeros
+ truncate_zeros(buffer, buffer + strlen(buffer));
+
+ // fill results
+ *out_mantissa = buffer;
+ *out_exponent = exponent;
+ }
+#else
+ PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
+ {
+ // get a scientific notation value with IEEE DBL_DIG decimals
+ sprintf(buffer, "%.*e", DBL_DIG, value);
+ assert(strlen(buffer) < buffer_size);
+ (void)!buffer_size;
+
+ // get the exponent (possibly negative)
+ char* exponent_string = strchr(buffer, 'e');
+ assert(exponent_string);
+
+ int exponent = atoi(exponent_string + 1);
+
+ // extract mantissa string: skip sign
+ char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
+ assert(mantissa[0] != '0' && mantissa[1] == '.');
+
+ // divide mantissa by 10 to eliminate integer part
+ mantissa[1] = mantissa[0];
+ mantissa++;
+ exponent++;
+
+ // remove extra mantissa digits and zero-terminate mantissa
+ truncate_zeros(mantissa, exponent_string);
+
+ // fill results
+ *out_mantissa = mantissa;
+ *out_exponent = exponent;
+ }
+#endif
+
+ PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
+ {
+ // try special number conversion
+ const char_t* special = convert_number_to_string_special(value);
+ if (special) return xpath_string::from_const(special);
+
+ // get mantissa + exponent form
+ char mantissa_buffer[32];
+
+ char* mantissa;
+ int exponent;
+ convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
+
+ // allocate a buffer of suitable length for the number
+ size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
+ char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
+ assert(result);
+
+ // make the number!
+ char_t* s = result;
+
+ // sign
+ if (value < 0) *s++ = '-';
+
+ // integer part
+ if (exponent <= 0)
+ {
+ *s++ = '0';
+ }
+ else
+ {
+ while (exponent > 0)
+ {
+ assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);
+ *s++ = *mantissa ? *mantissa++ : '0';
+ exponent--;
+ }
+ }
+
+ // fractional part
+ if (*mantissa)
+ {
+ // decimal point
+ *s++ = '.';
+
+ // extra zeroes from negative exponent
+ while (exponent < 0)
+ {
+ *s++ = '0';
+ exponent++;
+ }
+
+ // extra mantissa digits
+ while (*mantissa)
+ {
+ assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
+ *s++ = *mantissa++;
+ }
+ }
+
+ // zero-terminate
+ assert(s < result + result_size);
+ *s = 0;
+
+ return xpath_string::from_heap_preallocated(result, s);
+ }
+
+ PUGI__FN bool check_string_to_number_format(const char_t* string)
+ {
+ // parse leading whitespace
+ while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
+
+ // parse sign
+ if (*string == '-') ++string;
+
+ if (!*string) return false;
+
+ // if there is no integer part, there should be a decimal part with at least one digit
+ if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
+
+ // parse integer part
+ while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
+
+ // parse decimal part
+ if (*string == '.')
+ {
+ ++string;
+
+ while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
+ }
+
+ // parse trailing whitespace
+ while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
+
+ return *string == 0;
+ }
+
+ PUGI__FN double convert_string_to_number(const char_t* string)
+ {
+ // check string format
+ if (!check_string_to_number_format(string)) return gen_nan();
+
+ // parse string
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcstod(string, 0);
+ #else
+ return atof(string);
+ #endif
+ }
+
+ PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
+ {
+ size_t length = static_cast<size_t>(end - begin);
+ char_t* scratch = buffer;
+
+ if (length >= sizeof(buffer) / sizeof(buffer[0]))
+ {
+ // need to make dummy on-heap copy
+ scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!scratch) return false;
+ }
+
+ // copy string to zero-terminated buffer and perform conversion
+ memcpy(scratch, begin, length * sizeof(char_t));
+ scratch[length] = 0;
+
+ *out_result = convert_string_to_number(scratch);
+
+ // free dummy buffer
+ if (scratch != buffer) xml_memory::deallocate(scratch);
+
+ return true;
+ }
+
+ PUGI__FN double round_nearest(double value)
+ {
+ return floor(value + 0.5);
+ }
+
+ PUGI__FN double round_nearest_nzero(double value)
+ {
+ // same as round_nearest, but returns -0 for [-0.5, -0]
+ // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
+ return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
+ }
+
+ PUGI__FN const char_t* qualified_name(const xpath_node& node)
+ {
+ return node.attribute() ? node.attribute().name() : node.node().name();
+ }
+
+ PUGI__FN const char_t* local_name(const xpath_node& node)
+ {
+ const char_t* name = qualified_name(node);
+ const char_t* p = find_char(name, ':');
+
+ return p ? p + 1 : name;
+ }
+
+ struct namespace_uri_predicate
+ {
+ const char_t* prefix;
+ size_t prefix_length;
+
+ namespace_uri_predicate(const char_t* name)
+ {
+ const char_t* pos = find_char(name, ':');
+
+ prefix = pos ? name : 0;
+ prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
+ }
+
+ bool operator()(xml_attribute a) const
+ {
+ const char_t* name = a.name();
+
+ if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
+
+ return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
+ }
+ };
+
+ PUGI__FN const char_t* namespace_uri(xml_node node)
+ {
+ namespace_uri_predicate pred = node.name();
+
+ xml_node p = node;
+
+ while (p)
+ {
+ xml_attribute a = p.find_attribute(pred);
+
+ if (a) return a.value();
+
+ p = p.parent();
+ }
+
+ return PUGIXML_TEXT("");
+ }
+
+ PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
+ {
+ namespace_uri_predicate pred = attr.name();
+
+ // Default namespace does not apply to attributes
+ if (!pred.prefix) return PUGIXML_TEXT("");
+
+ xml_node p = parent;
+
+ while (p)
+ {
+ xml_attribute a = p.find_attribute(pred);
+
+ if (a) return a.value();
+
+ p = p.parent();
+ }
+
+ return PUGIXML_TEXT("");
+ }
+
+ PUGI__FN const char_t* namespace_uri(const xpath_node& node)
+ {
+ return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
+ }
+
+ PUGI__FN void normalize_space(char_t* buffer)
+ {
+ char_t* write = buffer;
+
+ for (char_t* it = buffer; *it; )
+ {
+ char_t ch = *it++;
+
+ if (PUGI__IS_CHARTYPE(ch, ct_space))
+ {
+ // replace whitespace sequence with single space
+ while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
+
+ // avoid leading spaces
+ if (write != buffer) *write++ = ' ';
+ }
+ else *write++ = ch;
+ }
+
+ // remove trailing space
+ if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
+
+ // zero-terminate
+ *write = 0;
+ }
+
+ PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
+ {
+ char_t* write = buffer;
+
+ while (*buffer)
+ {
+ PUGI__DMC_VOLATILE char_t ch = *buffer++;
+
+ const char_t* pos = find_char(from, ch);
+
+ if (!pos)
+ *write++ = ch; // do not process
+ else if (static_cast<size_t>(pos - from) < to_length)
+ *write++ = to[pos - from]; // replace
+ }
+
+ // zero-terminate
+ *write = 0;
+ }
+
+ PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
+ {
+ unsigned char table[128] = {0};
+
+ while (*from)
+ {
+ unsigned int fc = static_cast<unsigned int>(*from);
+ unsigned int tc = static_cast<unsigned int>(*to);
+
+ if (fc >= 128 || tc >= 128)
+ return 0;
+
+ // code=128 means "skip character"
+ if (!table[fc])
+ table[fc] = static_cast<unsigned char>(tc ? tc : 128);
+
+ from++;
+ if (tc) to++;
+ }
+
+ for (int i = 0; i < 128; ++i)
+ if (!table[i])
+ table[i] = static_cast<unsigned char>(i);
+
+ void* result = alloc->allocate_nothrow(sizeof(table));
+
+ if (result)
+ {
+ memcpy(result, table, sizeof(table));
+ }
+
+ return static_cast<unsigned char*>(result);
+ }
+
+ PUGI__FN void translate_table(char_t* buffer, const unsigned char* table)
+ {
+ char_t* write = buffer;
+
+ while (*buffer)
+ {
+ char_t ch = *buffer++;
+ unsigned int index = static_cast<unsigned int>(ch);
+
+ if (index < 128)
+ {
+ unsigned char code = table[index];
+
+ // code=128 means "skip character" (table size is 128 so 128 can be a special value)
+ // this code skips these characters without extra branches
+ *write = static_cast<char_t>(code);
+ write += 1 - (code >> 7);
+ }
+ else
+ {
+ *write++ = ch;
+ }
+ }
+
+ // zero-terminate
+ *write = 0;
+ }
+
+ inline bool is_xpath_attribute(const char_t* name)
+ {
+ return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
+ }
+
+ struct xpath_variable_boolean: xpath_variable
+ {
+ xpath_variable_boolean(): value(false)
+ {
+ }
+
+ bool value;
+ char_t name[1];
+ };
+
+ struct xpath_variable_number: xpath_variable
+ {
+ xpath_variable_number(): value(0)
+ {
+ }
+
+ double value;
+ char_t name[1];
+ };
+
+ struct xpath_variable_string: xpath_variable
+ {
+ xpath_variable_string(): value(0)
+ {
+ }
+
+ ~xpath_variable_string()
+ {
+ if (value) xml_memory::deallocate(value);
+ }
+
+ char_t* value;
+ char_t name[1];
+ };
+
+ struct xpath_variable_node_set: xpath_variable
+ {
+ xpath_node_set value;
+ char_t name[1];
+ };
+
+ static const xpath_node_set dummy_node_set;
+
+ PUGI__FN unsigned int hash_string(const char_t* str)
+ {
+ // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
+ unsigned int result = 0;
+
+ while (*str)
+ {
+ result += static_cast<unsigned int>(*str++);
+ result += result << 10;
+ result ^= result >> 6;
+ }
+
+ result += result << 3;
+ result ^= result >> 11;
+ result += result << 15;
+
+ return result;
+ }
+
+ template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
+ {
+ size_t length = strlength(name);
+ if (length == 0) return 0; // empty variable names are invalid
+
+ // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
+ void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
+ if (!memory) return 0;
+
+ T* result = new (memory) T();
+
+ memcpy(result->name, name, (length + 1) * sizeof(char_t));
+
+ return result;
+ }
+
+ PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
+ {
+ switch (type)
+ {
+ case xpath_type_node_set:
+ return new_xpath_variable<xpath_variable_node_set>(name);
+
+ case xpath_type_number:
+ return new_xpath_variable<xpath_variable_number>(name);
+
+ case xpath_type_string:
+ return new_xpath_variable<xpath_variable_string>(name);
+
+ case xpath_type_boolean:
+ return new_xpath_variable<xpath_variable_boolean>(name);
+
+ default:
+ return 0;
+ }
+ }
+
+ template <typename T> PUGI__FN void delete_xpath_variable(T* var)
+ {
+ var->~T();
+ xml_memory::deallocate(var);
+ }
+
+ PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
+ {
+ switch (type)
+ {
+ case xpath_type_node_set:
+ delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
+ break;
+
+ case xpath_type_number:
+ delete_xpath_variable(static_cast<xpath_variable_number*>(var));
+ break;
+
+ case xpath_type_string:
+ delete_xpath_variable(static_cast<xpath_variable_string*>(var));
+ break;
+
+ case xpath_type_boolean:
+ delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
+ break;
+
+ default:
+ assert(!"Invalid variable type");
+ }
+ }
+
+ PUGI__FN xpath_variable* get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end)
+ {
+ size_t length = static_cast<size_t>(end - begin);
+ char_t* scratch = buffer;
+
+ if (length >= sizeof(buffer) / sizeof(buffer[0]))
+ {
+ // need to make dummy on-heap copy
+ scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!scratch) return 0;
+ }
+
+ // copy string to zero-terminated buffer and perform lookup
+ memcpy(scratch, begin, length * sizeof(char_t));
+ scratch[length] = 0;
+
+ xpath_variable* result = set->get(scratch);
+
+ // free dummy buffer
+ if (scratch != buffer) xml_memory::deallocate(scratch);
+
+ return result;
+ }
+PUGI__NS_END
+
+// Internal node set class
+PUGI__NS_BEGIN
+ PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
+ {
+ if (end - begin < 2)
+ return xpath_node_set::type_sorted;
+
+ document_order_comparator cmp;
+
+ bool first = cmp(begin[0], begin[1]);
+
+ for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
+ if (cmp(it[0], it[1]) != first)
+ return xpath_node_set::type_unsorted;
+
+ return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
+ }
+
+ PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
+ {
+ xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
+
+ if (type == xpath_node_set::type_unsorted)
+ {
+ xpath_node_set::type_t sorted = xpath_get_order(begin, end);
+
+ if (sorted == xpath_node_set::type_unsorted)
+ {
+ sort(begin, end, document_order_comparator());
+
+ type = xpath_node_set::type_sorted;
+ }
+ else
+ type = sorted;
+ }
+
+ if (type != order) reverse(begin, end);
+
+ return order;
+ }
+
+ PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
+ {
+ if (begin == end) return xpath_node();
+
+ switch (type)
+ {
+ case xpath_node_set::type_sorted:
+ return *begin;
+
+ case xpath_node_set::type_sorted_reverse:
+ return *(end - 1);
+
+ case xpath_node_set::type_unsorted:
+ return *min_element(begin, end, document_order_comparator());
+
+ default:
+ assert(!"Invalid node set type");
+ return xpath_node();
+ }
+ }
+
+ class xpath_node_set_raw
+ {
+ xpath_node_set::type_t _type;
+
+ xpath_node* _begin;
+ xpath_node* _end;
+ xpath_node* _eos;
+
+ public:
+ xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
+ {
+ }
+
+ xpath_node* begin() const
+ {
+ return _begin;
+ }
+
+ xpath_node* end() const
+ {
+ return _end;
+ }
+
+ bool empty() const
+ {
+ return _begin == _end;
+ }
+
+ size_t size() const
+ {
+ return static_cast<size_t>(_end - _begin);
+ }
+
+ xpath_node first() const
+ {
+ return xpath_first(_begin, _end, _type);
+ }
+
+ void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
+
+ void push_back(const xpath_node& node, xpath_allocator* alloc)
+ {
+ if (_end != _eos)
+ *_end++ = node;
+ else
+ push_back_grow(node, alloc);
+ }
+
+ void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
+ {
+ if (begin_ == end_) return;
+
+ size_t size_ = static_cast<size_t>(_end - _begin);
+ size_t capacity = static_cast<size_t>(_eos - _begin);
+ size_t count = static_cast<size_t>(end_ - begin_);
+
+ if (size_ + count > capacity)
+ {
+ // reallocate the old array or allocate a new one
+ xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
+ assert(data);
+
+ // finalize
+ _begin = data;
+ _end = data + size_;
+ _eos = data + size_ + count;
+ }
+
+ memcpy(_end, begin_, count * sizeof(xpath_node));
+ _end += count;
+ }
+
+ void sort_do()
+ {
+ _type = xpath_sort(_begin, _end, _type, false);
+ }
+
+ void truncate(xpath_node* pos)
+ {
+ assert(_begin <= pos && pos <= _end);
+
+ _end = pos;
+ }
+
+ void remove_duplicates()
+ {
+ if (_type == xpath_node_set::type_unsorted)
+ sort(_begin, _end, duplicate_comparator());
+
+ _end = unique(_begin, _end);
+ }
+
+ xpath_node_set::type_t type() const
+ {
+ return _type;
+ }
+
+ void set_type(xpath_node_set::type_t value)
+ {
+ _type = value;
+ }
+ };
+
+ PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
+ {
+ size_t capacity = static_cast<size_t>(_eos - _begin);
+
+ // get new capacity (1.5x rule)
+ size_t new_capacity = capacity + capacity / 2 + 1;
+
+ // reallocate the old array or allocate a new one
+ xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
+ assert(data);
+
+ // finalize
+ _begin = data;
+ _end = data + capacity;
+ _eos = data + new_capacity;
+
+ // push
+ *_end++ = node;
+ }
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+ struct xpath_context
+ {
+ xpath_node n;
+ size_t position, size;
+
+ xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
+ {
+ }
+ };
+
+ enum lexeme_t
+ {
+ lex_none = 0,
+ lex_equal,
+ lex_not_equal,
+ lex_less,
+ lex_greater,
+ lex_less_or_equal,
+ lex_greater_or_equal,
+ lex_plus,
+ lex_minus,
+ lex_multiply,
+ lex_union,
+ lex_var_ref,
+ lex_open_brace,
+ lex_close_brace,
+ lex_quoted_string,
+ lex_number,
+ lex_slash,
+ lex_double_slash,
+ lex_open_square_brace,
+ lex_close_square_brace,
+ lex_string,
+ lex_comma,
+ lex_axis_attribute,
+ lex_dot,
+ lex_double_dot,
+ lex_double_colon,
+ lex_eof
+ };
+
+ struct xpath_lexer_string
+ {
+ const char_t* begin;
+ const char_t* end;
+
+ xpath_lexer_string(): begin(0), end(0)
+ {
+ }
+
+ bool operator==(const char_t* other) const
+ {
+ size_t length = static_cast<size_t>(end - begin);
+
+ return strequalrange(other, begin, length);
+ }
+ };
+
+ class xpath_lexer
+ {
+ const char_t* _cur;
+ const char_t* _cur_lexeme_pos;
+ xpath_lexer_string _cur_lexeme_contents;
+
+ lexeme_t _cur_lexeme;
+
+ public:
+ explicit xpath_lexer(const char_t* query): _cur(query)
+ {
+ next();
+ }
+
+ const char_t* state() const
+ {
+ return _cur;
+ }
+
+ void next()
+ {
+ const char_t* cur = _cur;
+
+ while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
+
+ // save lexeme position for error reporting
+ _cur_lexeme_pos = cur;
+
+ switch (*cur)
+ {
+ case 0:
+ _cur_lexeme = lex_eof;
+ break;
+
+ case '>':
+ if (*(cur+1) == '=')
+ {
+ cur += 2;
+ _cur_lexeme = lex_greater_or_equal;
+ }
+ else
+ {
+ cur += 1;
+ _cur_lexeme = lex_greater;
+ }
+ break;
+
+ case '<':
+ if (*(cur+1) == '=')
+ {
+ cur += 2;
+ _cur_lexeme = lex_less_or_equal;
+ }
+ else
+ {
+ cur += 1;
+ _cur_lexeme = lex_less;
+ }
+ break;
+
+ case '!':
+ if (*(cur+1) == '=')
+ {
+ cur += 2;
+ _cur_lexeme = lex_not_equal;
+ }
+ else
+ {
+ _cur_lexeme = lex_none;
+ }
+ break;
+
+ case '=':
+ cur += 1;
+ _cur_lexeme = lex_equal;
+
+ break;
+
+ case '+':
+ cur += 1;
+ _cur_lexeme = lex_plus;
+
+ break;
+
+ case '-':
+ cur += 1;
+ _cur_lexeme = lex_minus;
+
+ break;
+
+ case '*':
+ cur += 1;
+ _cur_lexeme = lex_multiply;
+
+ break;
+
+ case '|':
+ cur += 1;
+ _cur_lexeme = lex_union;
+
+ break;
+
+ case '$':
+ cur += 1;
+
+ if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
+ {
+ _cur_lexeme_contents.begin = cur;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+
+ if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
+ {
+ cur++; // :
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+ }
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_var_ref;
+ }
+ else
+ {
+ _cur_lexeme = lex_none;
+ }
+
+ break;
+
+ case '(':
+ cur += 1;
+ _cur_lexeme = lex_open_brace;
+
+ break;
+
+ case ')':
+ cur += 1;
+ _cur_lexeme = lex_close_brace;
+
+ break;
+
+ case '[':
+ cur += 1;
+ _cur_lexeme = lex_open_square_brace;
+
+ break;
+
+ case ']':
+ cur += 1;
+ _cur_lexeme = lex_close_square_brace;
+
+ break;
+
+ case ',':
+ cur += 1;
+ _cur_lexeme = lex_comma;
+
+ break;
+
+ case '/':
+ if (*(cur+1) == '/')
+ {
+ cur += 2;
+ _cur_lexeme = lex_double_slash;
+ }
+ else
+ {
+ cur += 1;
+ _cur_lexeme = lex_slash;
+ }
+ break;
+
+ case '.':
+ if (*(cur+1) == '.')
+ {
+ cur += 2;
+ _cur_lexeme = lex_double_dot;
+ }
+ else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
+ {
+ _cur_lexeme_contents.begin = cur; // .
+
+ ++cur;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_number;
+ }
+ else
+ {
+ cur += 1;
+ _cur_lexeme = lex_dot;
+ }
+ break;
+
+ case '@':
+ cur += 1;
+ _cur_lexeme = lex_axis_attribute;
+
+ break;
+
+ case '"':
+ case '\'':
+ {
+ char_t terminator = *cur;
+
+ ++cur;
+
+ _cur_lexeme_contents.begin = cur;
+ while (*cur && *cur != terminator) cur++;
+ _cur_lexeme_contents.end = cur;
+
+ if (!*cur)
+ _cur_lexeme = lex_none;
+ else
+ {
+ cur += 1;
+ _cur_lexeme = lex_quoted_string;
+ }
+
+ break;
+ }
+
+ case ':':
+ if (*(cur+1) == ':')
+ {
+ cur += 2;
+ _cur_lexeme = lex_double_colon;
+ }
+ else
+ {
+ _cur_lexeme = lex_none;
+ }
+ break;
+
+ default:
+ if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
+ {
+ _cur_lexeme_contents.begin = cur;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+
+ if (*cur == '.')
+ {
+ cur++;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+ }
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_number;
+ }
+ else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
+ {
+ _cur_lexeme_contents.begin = cur;
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+
+ if (cur[0] == ':')
+ {
+ if (cur[1] == '*') // namespace test ncname:*
+ {
+ cur += 2; // :*
+ }
+ else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
+ {
+ cur++; // :
+
+ while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+ }
+ }
+
+ _cur_lexeme_contents.end = cur;
+
+ _cur_lexeme = lex_string;
+ }
+ else
+ {
+ _cur_lexeme = lex_none;
+ }
+ }
+
+ _cur = cur;
+ }
+
+ lexeme_t current() const
+ {
+ return _cur_lexeme;
+ }
+
+ const char_t* current_pos() const
+ {
+ return _cur_lexeme_pos;
+ }
+
+ const xpath_lexer_string& contents() const
+ {
+ assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
+
+ return _cur_lexeme_contents;
+ }
+ };
+
+ enum ast_type_t
+ {
+ ast_unknown,
+ ast_op_or, // left or right
+ ast_op_and, // left and right
+ ast_op_equal, // left = right
+ ast_op_not_equal, // left != right
+ ast_op_less, // left < right
+ ast_op_greater, // left > right
+ ast_op_less_or_equal, // left <= right
+ ast_op_greater_or_equal, // left >= right
+ ast_op_add, // left + right
+ ast_op_subtract, // left - right
+ ast_op_multiply, // left * right
+ ast_op_divide, // left / right
+ ast_op_mod, // left % right
+ ast_op_negate, // left - right
+ ast_op_union, // left | right
+ ast_predicate, // apply predicate to set; next points to next predicate
+ ast_filter, // select * from left where right
+ ast_string_constant, // string constant
+ ast_number_constant, // number constant
+ ast_variable, // variable
+ ast_func_last, // last()
+ ast_func_position, // position()
+ ast_func_count, // count(left)
+ ast_func_id, // id(left)
+ ast_func_local_name_0, // local-name()
+ ast_func_local_name_1, // local-name(left)
+ ast_func_namespace_uri_0, // namespace-uri()
+ ast_func_namespace_uri_1, // namespace-uri(left)
+ ast_func_name_0, // name()
+ ast_func_name_1, // name(left)
+ ast_func_string_0, // string()
+ ast_func_string_1, // string(left)
+ ast_func_concat, // concat(left, right, siblings)
+ ast_func_starts_with, // starts_with(left, right)
+ ast_func_contains, // contains(left, right)
+ ast_func_substring_before, // substring-before(left, right)
+ ast_func_substring_after, // substring-after(left, right)
+ ast_func_substring_2, // substring(left, right)
+ ast_func_substring_3, // substring(left, right, third)
+ ast_func_string_length_0, // string-length()
+ ast_func_string_length_1, // string-length(left)
+ ast_func_normalize_space_0, // normalize-space()
+ ast_func_normalize_space_1, // normalize-space(left)
+ ast_func_translate, // translate(left, right, third)
+ ast_func_boolean, // boolean(left)
+ ast_func_not, // not(left)
+ ast_func_true, // true()
+ ast_func_false, // false()
+ ast_func_lang, // lang(left)
+ ast_func_number_0, // number()
+ ast_func_number_1, // number(left)
+ ast_func_sum, // sum(left)
+ ast_func_floor, // floor(left)
+ ast_func_ceiling, // ceiling(left)
+ ast_func_round, // round(left)
+ ast_step, // process set left with step
+ ast_step_root, // select root node
+
+ ast_opt_translate_table, // translate(left, right, third) where right/third are constants
+ ast_opt_compare_attribute // @name = 'string'
+ };
+
+ enum axis_t
+ {
+ axis_ancestor,
+ axis_ancestor_or_self,
+ axis_attribute,
+ axis_child,
+ axis_descendant,
+ axis_descendant_or_self,
+ axis_following,
+ axis_following_sibling,
+ axis_namespace,
+ axis_parent,
+ axis_preceding,
+ axis_preceding_sibling,
+ axis_self
+ };
+
+ enum nodetest_t
+ {
+ nodetest_none,
+ nodetest_name,
+ nodetest_type_node,
+ nodetest_type_comment,
+ nodetest_type_pi,
+ nodetest_type_text,
+ nodetest_pi,
+ nodetest_all,
+ nodetest_all_in_namespace
+ };
+
+ enum predicate_t
+ {
+ predicate_default,
+ predicate_posinv,
+ predicate_constant,
+ predicate_constant_one
+ };
+
+ enum nodeset_eval_t
+ {
+ nodeset_eval_all,
+ nodeset_eval_any,
+ nodeset_eval_first
+ };
+
+ template <axis_t N> struct axis_to_type
+ {
+ static const axis_t axis;
+ };
+
+ template <axis_t N> const axis_t axis_to_type<N>::axis = N;
+
+ class xpath_ast_node
+ {
+ private:
+ // node type
+ char _type;
+ char _rettype;
+
+ // for ast_step
+ char _axis;
+
+ // for ast_step/ast_predicate/ast_filter
+ char _test;
+
+ // tree node structure
+ xpath_ast_node* _left;
+ xpath_ast_node* _right;
+ xpath_ast_node* _next;
+
+ union
+ {
+ // value for ast_string_constant
+ const char_t* string;
+ // value for ast_number_constant
+ double number;
+ // variable for ast_variable
+ xpath_variable* variable;
+ // node test for ast_step (node name/namespace/node type/pi target)
+ const char_t* nodetest;
+ // table for ast_opt_translate_table
+ const unsigned char* table;
+ } _data;
+
+ xpath_ast_node(const xpath_ast_node&);
+ xpath_ast_node& operator=(const xpath_ast_node&);
+
+ template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
+ {
+ xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
+
+ if (lt != xpath_type_node_set && rt != xpath_type_node_set)
+ {
+ if (lt == xpath_type_boolean || rt == xpath_type_boolean)
+ return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
+ else if (lt == xpath_type_number || rt == xpath_type_number)
+ return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
+ else if (lt == xpath_type_string || rt == xpath_type_string)
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string ls = lhs->eval_string(c, stack);
+ xpath_string rs = rhs->eval_string(c, stack);
+
+ return comp(ls, rs);
+ }
+ }
+ else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+ for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
+ {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
+ return true;
+ }
+
+ return false;
+ }
+ else
+ {
+ if (lt == xpath_type_node_set)
+ {
+ swap(lhs, rhs);
+ swap(lt, rt);
+ }
+
+ if (lt == xpath_type_boolean)
+ return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
+ else if (lt == xpath_type_number)
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ double l = lhs->eval_number(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
+ {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+ return true;
+ }
+
+ return false;
+ }
+ else if (lt == xpath_type_string)
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string l = lhs->eval_string(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
+ {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(l, string_value(*ri, stack.result)))
+ return true;
+ }
+
+ return false;
+ }
+ }
+
+ assert(!"Wrong types");
+ return false;
+ }
+
+ static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
+ {
+ return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
+ }
+
+ template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
+ {
+ xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
+
+ if (lt != xpath_type_node_set && rt != xpath_type_node_set)
+ return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
+ else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+ for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
+ {
+ xpath_allocator_capture cri(stack.result);
+
+ double l = convert_string_to_number(string_value(*li, stack.result).c_str());
+
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
+ {
+ xpath_allocator_capture crii(stack.result);
+
+ if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+ return true;
+ }
+ }
+
+ return false;
+ }
+ else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ double l = lhs->eval_number(c, stack);
+ xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+ for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
+ {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+ return true;
+ }
+
+ return false;
+ }
+ else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
+ double r = rhs->eval_number(c, stack);
+
+ for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
+ {
+ xpath_allocator_capture cri(stack.result);
+
+ if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
+ return true;
+ }
+
+ return false;
+ }
+ else
+ {
+ assert(!"Wrong types");
+ return false;
+ }
+ }
+
+ static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
+ {
+ assert(ns.size() >= first);
+ assert(expr->rettype() != xpath_type_number);
+
+ size_t i = 1;
+ size_t size = ns.size() - first;
+
+ xpath_node* last = ns.begin() + first;
+
+ // remove_if... or well, sort of
+ for (xpath_node* it = last; it != ns.end(); ++it, ++i)
+ {
+ xpath_context c(*it, i, size);
+
+ if (expr->eval_boolean(c, stack))
+ {
+ *last++ = *it;
+
+ if (once) break;
+ }
+ }
+
+ ns.truncate(last);
+ }
+
+ static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
+ {
+ assert(ns.size() >= first);
+ assert(expr->rettype() == xpath_type_number);
+
+ size_t i = 1;
+ size_t size = ns.size() - first;
+
+ xpath_node* last = ns.begin() + first;
+
+ // remove_if... or well, sort of
+ for (xpath_node* it = last; it != ns.end(); ++it, ++i)
+ {
+ xpath_context c(*it, i, size);
+
+ if (expr->eval_number(c, stack) == i)
+ {
+ *last++ = *it;
+
+ if (once) break;
+ }
+ }
+
+ ns.truncate(last);
+ }
+
+ static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
+ {
+ assert(ns.size() >= first);
+ assert(expr->rettype() == xpath_type_number);
+
+ size_t size = ns.size() - first;
+
+ xpath_node* last = ns.begin() + first;
+
+ xpath_context c(xpath_node(), 1, size);
+
+ double er = expr->eval_number(c, stack);
+
+ if (er >= 1.0 && er <= size)
+ {
+ size_t eri = static_cast<size_t>(er);
+
+ if (er == eri)
+ {
+ xpath_node r = last[eri - 1];
+
+ *last++ = r;
+ }
+ }
+
+ ns.truncate(last);
+ }
+
+ void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
+ {
+ if (ns.size() == first) return;
+
+ assert(_type == ast_filter || _type == ast_predicate);
+
+ if (_test == predicate_constant || _test == predicate_constant_one)
+ apply_predicate_number_const(ns, first, _right, stack);
+ else if (_right->rettype() == xpath_type_number)
+ apply_predicate_number(ns, first, _right, stack, once);
+ else
+ apply_predicate_boolean(ns, first, _right, stack, once);
+ }
+
+ void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
+ {
+ if (ns.size() == first) return;
+
+ bool last_once = eval_once(ns.type(), eval);
+
+ for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
+ pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
+ }
+
+ bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
+ {
+ assert(a);
+
+ const char_t* name = a->name ? a->name : PUGIXML_TEXT("");
+
+ switch (_test)
+ {
+ case nodetest_name:
+ if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
+ {
+ ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_type_node:
+ case nodetest_all:
+ if (is_xpath_attribute(name))
+ {
+ ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_all_in_namespace:
+ if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
+ {
+ ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
+ return true;
+ }
+ break;
+
+ default:
+ ;
+ }
+
+ return false;
+ }
+
+ bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
+ {
+ assert(n);
+
+ xml_node_type type = PUGI__NODETYPE(n);
+
+ switch (_test)
+ {
+ case nodetest_name:
+ if (type == node_element && n->name && strequal(n->name, _data.nodetest))
+ {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_type_node:
+ ns.push_back(xml_node(n), alloc);
+ return true;
+
+ case nodetest_type_comment:
+ if (type == node_comment)
+ {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_type_text:
+ if (type == node_pcdata || type == node_cdata)
+ {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_type_pi:
+ if (type == node_pi)
+ {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_pi:
+ if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
+ {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_all:
+ if (type == node_element)
+ {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ case nodetest_all_in_namespace:
+ if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
+ {
+ ns.push_back(xml_node(n), alloc);
+ return true;
+ }
+ break;
+
+ default:
+ assert(!"Unknown axis");
+ }
+
+ return false;
+ }
+
+ template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
+ {
+ const axis_t axis = T::axis;
+
+ switch (axis)
+ {
+ case axis_attribute:
+ {
+ for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
+ if (step_push(ns, a, n, alloc) & once)
+ return;
+
+ break;
+ }
+
+ case axis_child:
+ {
+ for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
+ if (step_push(ns, c, alloc) & once)
+ return;
+
+ break;
+ }
+
+ case axis_descendant:
+ case axis_descendant_or_self:
+ {
+ if (axis == axis_descendant_or_self)
+ if (step_push(ns, n, alloc) & once)
+ return;
+
+ xml_node_struct* cur = n->first_child;
+
+ while (cur)
+ {
+ if (step_push(ns, cur, alloc) & once)
+ return;
+
+ if (cur->first_child)
+ cur = cur->first_child;
+ else
+ {
+ while (!cur->next_sibling)
+ {
+ cur = cur->parent;
+
+ if (cur == n) return;
+ }
+
+ cur = cur->next_sibling;
+ }
+ }
+
+ break;
+ }
+
+ case axis_following_sibling:
+ {
+ for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
+ if (step_push(ns, c, alloc) & once)
+ return;
+
+ break;
+ }
+
+ case axis_preceding_sibling:
+ {
+ for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
+ if (step_push(ns, c, alloc) & once)
+ return;
+
+ break;
+ }
+
+ case axis_following:
+ {
+ xml_node_struct* cur = n;
+
+ // exit from this node so that we don't include descendants
+ while (!cur->next_sibling)
+ {
+ cur = cur->parent;
+
+ if (!cur) return;
+ }
+
+ cur = cur->next_sibling;
+
+ while (cur)
+ {
+ if (step_push(ns, cur, alloc) & once)
+ return;
+
+ if (cur->first_child)
+ cur = cur->first_child;
+ else
+ {
+ while (!cur->next_sibling)
+ {
+ cur = cur->parent;
+
+ if (!cur) return;
+ }
+
+ cur = cur->next_sibling;
+ }
+ }
+
+ break;
+ }
+
+ case axis_preceding:
+ {
+ xml_node_struct* cur = n;
+
+ // exit from this node so that we don't include descendants
+ while (!cur->prev_sibling_c->next_sibling)
+ {
+ cur = cur->parent;
+
+ if (!cur) return;
+ }
+
+ cur = cur->prev_sibling_c;
+
+ while (cur)
+ {
+ if (cur->first_child)
+ cur = cur->first_child->prev_sibling_c;
+ else
+ {
+ // leaf node, can't be ancestor
+ if (step_push(ns, cur, alloc) & once)
+ return;
+
+ while (!cur->prev_sibling_c->next_sibling)
+ {
+ cur = cur->parent;
+
+ if (!cur) return;
+
+ if (!node_is_ancestor(cur, n))
+ if (step_push(ns, cur, alloc) & once)
+ return;
+ }
+
+ cur = cur->prev_sibling_c;
+ }
+ }
+
+ break;
+ }
+
+ case axis_ancestor:
+ case axis_ancestor_or_self:
+ {
+ if (axis == axis_ancestor_or_self)
+ if (step_push(ns, n, alloc) & once)
+ return;
+
+ xml_node_struct* cur = n->parent;
+
+ while (cur)
+ {
+ if (step_push(ns, cur, alloc) & once)
+ return;
+
+ cur = cur->parent;
+ }
+
+ break;
+ }
+
+ case axis_self:
+ {
+ step_push(ns, n, alloc);
+
+ break;
+ }
+
+ case axis_parent:
+ {
+ if (n->parent)
+ step_push(ns, n->parent, alloc);
+
+ break;
+ }
+
+ default:
+ assert(!"Unimplemented axis");
+ }
+ }
+
+ template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
+ {
+ const axis_t axis = T::axis;
+
+ switch (axis)
+ {
+ case axis_ancestor:
+ case axis_ancestor_or_self:
+ {
+ if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
+ if (step_push(ns, a, p, alloc) & once)
+ return;
+
+ xml_node_struct* cur = p;
+
+ while (cur)
+ {
+ if (step_push(ns, cur, alloc) & once)
+ return;
+
+ cur = cur->parent;
+ }
+
+ break;
+ }
+
+ case axis_descendant_or_self:
+ case axis_self:
+ {
+ if (_test == nodetest_type_node) // reject attributes based on principal node type test
+ step_push(ns, a, p, alloc);
+
+ break;
+ }
+
+ case axis_following:
+ {
+ xml_node_struct* cur = p;
+
+ while (cur)
+ {
+ if (cur->first_child)
+ cur = cur->first_child;
+ else
+ {
+ while (!cur->next_sibling)
+ {
+ cur = cur->parent;
+
+ if (!cur) return;
+ }
+
+ cur = cur->next_sibling;
+ }
+
+ if (step_push(ns, cur, alloc) & once)
+ return;
+ }
+
+ break;
+ }
+
+ case axis_parent:
+ {
+ step_push(ns, p, alloc);
+
+ break;
+ }
+
+ case axis_preceding:
+ {
+ // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
+ step_fill(ns, p, alloc, once, v);
+ break;
+ }
+
+ default:
+ assert(!"Unimplemented axis");
+ }
+ }
+
+ template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
+ {
+ const axis_t axis = T::axis;
+ const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
+
+ if (xn.node())
+ step_fill(ns, xn.node().internal_object(), alloc, once, v);
+ else if (axis_has_attributes && xn.attribute() && xn.parent())
+ step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
+ }
+
+ template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
+ {
+ const axis_t axis = T::axis;
+ const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
+ const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
+
+ bool once =
+ (axis == axis_attribute && _test == nodetest_name) ||
+ (!_right && eval_once(axis_type, eval)) ||
+ (_right && !_right->_next && _right->_test == predicate_constant_one);
+
+ xpath_node_set_raw ns;
+ ns.set_type(axis_type);
+
+ if (_left)
+ {
+ xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
+
+ // self axis preserves the original order
+ if (axis == axis_self) ns.set_type(s.type());
+
+ for (const xpath_node* it = s.begin(); it != s.end(); ++it)
+ {
+ size_t size = ns.size();
+
+ // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
+ if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
+
+ step_fill(ns, *it, stack.result, once, v);
+ if (_right) apply_predicates(ns, size, stack, eval);
+ }
+ }
+ else
+ {
+ step_fill(ns, c.n, stack.result, once, v);
+ if (_right) apply_predicates(ns, 0, stack, eval);
+ }
+
+ // child, attribute and self axes always generate unique set of nodes
+ // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
+ if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
+ ns.remove_duplicates();
+
+ return ns;
+ }
+
+ public:
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
+ _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
+ {
+ assert(type == ast_string_constant);
+ _data.string = value;
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
+ _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
+ {
+ assert(type == ast_number_constant);
+ _data.number = value;
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
+ _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
+ {
+ assert(type == ast_variable);
+ _data.variable = value;
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
+ _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
+ {
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
+ _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
+ {
+ assert(type == ast_step);
+ _data.nodetest = contents;
+ }
+
+ xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
+ _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
+ {
+ assert(type == ast_filter || type == ast_predicate);
+ }
+
+ void set_next(xpath_ast_node* value)
+ {
+ _next = value;
+ }
+
+ void set_right(xpath_ast_node* value)
+ {
+ _right = value;
+ }
+
+ bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
+ {
+ switch (_type)
+ {
+ case ast_op_or:
+ return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
+
+ case ast_op_and:
+ return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
+
+ case ast_op_equal:
+ return compare_eq(_left, _right, c, stack, equal_to());
+
+ case ast_op_not_equal:
+ return compare_eq(_left, _right, c, stack, not_equal_to());
+
+ case ast_op_less:
+ return compare_rel(_left, _right, c, stack, less());
+
+ case ast_op_greater:
+ return compare_rel(_right, _left, c, stack, less());
+
+ case ast_op_less_or_equal:
+ return compare_rel(_left, _right, c, stack, less_equal());
+
+ case ast_op_greater_or_equal:
+ return compare_rel(_right, _left, c, stack, less_equal());
+
+ case ast_func_starts_with:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string lr = _left->eval_string(c, stack);
+ xpath_string rr = _right->eval_string(c, stack);
+
+ return starts_with(lr.c_str(), rr.c_str());
+ }
+
+ case ast_func_contains:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string lr = _left->eval_string(c, stack);
+ xpath_string rr = _right->eval_string(c, stack);
+
+ return find_substring(lr.c_str(), rr.c_str()) != 0;
+ }
+
+ case ast_func_boolean:
+ return _left->eval_boolean(c, stack);
+
+ case ast_func_not:
+ return !_left->eval_boolean(c, stack);
+
+ case ast_func_true:
+ return true;
+
+ case ast_func_false:
+ return false;
+
+ case ast_func_lang:
+ {
+ if (c.n.attribute()) return false;
+
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_string lang = _left->eval_string(c, stack);
+
+ for (xml_node n = c.n.node(); n; n = n.parent())
+ {
+ xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
+
+ if (a)
+ {
+ const char_t* value = a.value();
+
+ // strnicmp / strncasecmp is not portable
+ for (const char_t* lit = lang.c_str(); *lit; ++lit)
+ {
+ if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
+ ++value;
+ }
+
+ return *value == 0 || *value == '-';
+ }
+ }
+
+ return false;
+ }
+
+ case ast_opt_compare_attribute:
+ {
+ const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
+
+ xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
+
+ return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
+ }
+
+ case ast_variable:
+ {
+ assert(_rettype == _data.variable->type());
+
+ if (_rettype == xpath_type_boolean)
+ return _data.variable->get_boolean();
+
+ // fallthrough to type conversion
+ }
+
+ default:
+ {
+ switch (_rettype)
+ {
+ case xpath_type_number:
+ return convert_number_to_boolean(eval_number(c, stack));
+
+ case xpath_type_string:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ return !eval_string(c, stack).empty();
+ }
+
+ case xpath_type_node_set:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ return !eval_node_set(c, stack, nodeset_eval_any).empty();
+ }
+
+ default:
+ assert(!"Wrong expression for return type boolean");
+ return false;
+ }
+ }
+ }
+ }
+
+ double eval_number(const xpath_context& c, const xpath_stack& stack)
+ {
+ switch (_type)
+ {
+ case ast_op_add:
+ return _left->eval_number(c, stack) + _right->eval_number(c, stack);
+
+ case ast_op_subtract:
+ return _left->eval_number(c, stack) - _right->eval_number(c, stack);
+
+ case ast_op_multiply:
+ return _left->eval_number(c, stack) * _right->eval_number(c, stack);
+
+ case ast_op_divide:
+ return _left->eval_number(c, stack) / _right->eval_number(c, stack);
+
+ case ast_op_mod:
+ return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
+
+ case ast_op_negate:
+ return -_left->eval_number(c, stack);
+
+ case ast_number_constant:
+ return _data.number;
+
+ case ast_func_last:
+ return static_cast<double>(c.size);
+
+ case ast_func_position:
+ return static_cast<double>(c.position);
+
+ case ast_func_count:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
+ }
+
+ case ast_func_string_length_0:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ return static_cast<double>(string_value(c.n, stack.result).length());
+ }
+
+ case ast_func_string_length_1:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ return static_cast<double>(_left->eval_string(c, stack).length());
+ }
+
+ case ast_func_number_0:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ return convert_string_to_number(string_value(c.n, stack.result).c_str());
+ }
+
+ case ast_func_number_1:
+ return _left->eval_number(c, stack);
+
+ case ast_func_sum:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ double r = 0;
+
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
+
+ for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
+ {
+ xpath_allocator_capture cri(stack.result);
+
+ r += convert_string_to_number(string_value(*it, stack.result).c_str());
+ }
+
+ return r;
+ }
+
+ case ast_func_floor:
+ {
+ double r = _left->eval_number(c, stack);
+
+ return r == r ? floor(r) : r;
+ }
+
+ case ast_func_ceiling:
+ {
+ double r = _left->eval_number(c, stack);
+
+ return r == r ? ceil(r) : r;
+ }
+
+ case ast_func_round:
+ return round_nearest_nzero(_left->eval_number(c, stack));
+
+ case ast_variable:
+ {
+ assert(_rettype == _data.variable->type());
+
+ if (_rettype == xpath_type_number)
+ return _data.variable->get_number();
+
+ // fallthrough to type conversion
+ }
+
+ default:
+ {
+ switch (_rettype)
+ {
+ case xpath_type_boolean:
+ return eval_boolean(c, stack) ? 1 : 0;
+
+ case xpath_type_string:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ return convert_string_to_number(eval_string(c, stack).c_str());
+ }
+
+ case xpath_type_node_set:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ return convert_string_to_number(eval_string(c, stack).c_str());
+ }
+
+ default:
+ assert(!"Wrong expression for return type number");
+ return 0;
+ }
+
+ }
+ }
+ }
+
+ xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
+ {
+ assert(_type == ast_func_concat);
+
+ xpath_allocator_capture ct(stack.temp);
+
+ // count the string number
+ size_t count = 1;
+ for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
+
+ // gather all strings
+ xpath_string static_buffer[4];
+ xpath_string* buffer = static_buffer;
+
+ // allocate on-heap for large concats
+ if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
+ {
+ buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
+ assert(buffer);
+ }
+
+ // evaluate all strings to temporary stack
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ buffer[0] = _left->eval_string(c, swapped_stack);
+
+ size_t pos = 1;
+ for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
+ assert(pos == count);
+
+ // get total length
+ size_t length = 0;
+ for (size_t i = 0; i < count; ++i) length += buffer[i].length();
+
+ // create final string
+ char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
+ assert(result);
+
+ char_t* ri = result;
+
+ for (size_t j = 0; j < count; ++j)
+ for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
+ *ri++ = *bi;
+
+ *ri = 0;
+
+ return xpath_string::from_heap_preallocated(result, ri);
+ }
+
+ xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
+ {
+ switch (_type)
+ {
+ case ast_string_constant:
+ return xpath_string::from_const(_data.string);
+
+ case ast_func_local_name_0:
+ {
+ xpath_node na = c.n;
+
+ return xpath_string::from_const(local_name(na));
+ }
+
+ case ast_func_local_name_1:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
+ xpath_node na = ns.first();
+
+ return xpath_string::from_const(local_name(na));
+ }
+
+ case ast_func_name_0:
+ {
+ xpath_node na = c.n;
+
+ return xpath_string::from_const(qualified_name(na));
+ }
+
+ case ast_func_name_1:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
+ xpath_node na = ns.first();
+
+ return xpath_string::from_const(qualified_name(na));
+ }
+
+ case ast_func_namespace_uri_0:
+ {
+ xpath_node na = c.n;
+
+ return xpath_string::from_const(namespace_uri(na));
+ }
+
+ case ast_func_namespace_uri_1:
+ {
+ xpath_allocator_capture cr(stack.result);
+
+ xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
+ xpath_node na = ns.first();
+
+ return xpath_string::from_const(namespace_uri(na));
+ }
+
+ case ast_func_string_0:
+ return string_value(c.n, stack.result);
+
+ case ast_func_string_1:
+ return _left->eval_string(c, stack);
+
+ case ast_func_concat:
+ return eval_string_concat(c, stack);
+
+ case ast_func_substring_before:
+ {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, swapped_stack);
+ xpath_string p = _right->eval_string(c, swapped_stack);
+
+ const char_t* pos = find_substring(s.c_str(), p.c_str());
+
+ return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
+ }
+
+ case ast_func_substring_after:
+ {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, swapped_stack);
+ xpath_string p = _right->eval_string(c, swapped_stack);
+
+ const char_t* pos = find_substring(s.c_str(), p.c_str());
+ if (!pos) return xpath_string();
+
+ const char_t* rbegin = pos + p.length();
+ const char_t* rend = s.c_str() + s.length();
+
+ return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
+ }
+
+ case ast_func_substring_2:
+ {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, swapped_stack);
+ size_t s_length = s.length();
+
+ double first = round_nearest(_right->eval_number(c, stack));
+
+ if (is_nan(first)) return xpath_string(); // NaN
+ else if (first >= s_length + 1) return xpath_string();
+
+ size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
+ assert(1 <= pos && pos <= s_length + 1);
+
+ const char_t* rbegin = s.c_str() + (pos - 1);
+ const char_t* rend = s.c_str() + s.length();
+
+ return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
+ }
+
+ case ast_func_substring_3:
+ {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, swapped_stack);
+ size_t s_length = s.length();
+
+ double first = round_nearest(_right->eval_number(c, stack));
+ double last = first + round_nearest(_right->_next->eval_number(c, stack));
+
+ if (is_nan(first) || is_nan(last)) return xpath_string();
+ else if (first >= s_length + 1) return xpath_string();
+ else if (first >= last) return xpath_string();
+ else if (last < 1) return xpath_string();
+
+ size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
+ size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
+
+ assert(1 <= pos && pos <= end && end <= s_length + 1);
+ const char_t* rbegin = s.c_str() + (pos - 1);
+ const char_t* rend = s.c_str() + (end - 1);
+
+ return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
+ }
+
+ case ast_func_normalize_space_0:
+ {
+ xpath_string s = string_value(c.n, stack.result);
+
+ normalize_space(s.data(stack.result));
+
+ return s;
+ }
+
+ case ast_func_normalize_space_1:
+ {
+ xpath_string s = _left->eval_string(c, stack);
+
+ normalize_space(s.data(stack.result));
+
+ return s;
+ }
+
+ case ast_func_translate:
+ {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_string s = _left->eval_string(c, stack);
+ xpath_string from = _right->eval_string(c, swapped_stack);
+ xpath_string to = _right->_next->eval_string(c, swapped_stack);
+
+ translate(s.data(stack.result), from.c_str(), to.c_str(), to.length());
+
+ return s;
+ }
+
+ case ast_opt_translate_table:
+ {
+ xpath_string s = _left->eval_string(c, stack);
+
+ translate_table(s.data(stack.result), _data.table);
+
+ return s;
+ }
+
+ case ast_variable:
+ {
+ assert(_rettype == _data.variable->type());
+
+ if (_rettype == xpath_type_string)
+ return xpath_string::from_const(_data.variable->get_string());
+
+ // fallthrough to type conversion
+ }
+
+ default:
+ {
+ switch (_rettype)
+ {
+ case xpath_type_boolean:
+ return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
+
+ case xpath_type_number:
+ return convert_number_to_string(eval_number(c, stack), stack.result);
+
+ case xpath_type_node_set:
+ {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
+ return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
+ }
+
+ default:
+ assert(!"Wrong expression for return type string");
+ return xpath_string();
+ }
+ }
+ }
+ }
+
+ xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
+ {
+ switch (_type)
+ {
+ case ast_op_union:
+ {
+ xpath_allocator_capture cr(stack.temp);
+
+ xpath_stack swapped_stack = {stack.temp, stack.result};
+
+ xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
+ xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
+
+ // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
+ rs.set_type(xpath_node_set::type_unsorted);
+
+ rs.append(ls.begin(), ls.end(), stack.result);
+ rs.remove_duplicates();
+
+ return rs;
+ }
+
+ case ast_filter:
+ {
+ xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
+
+ // either expression is a number or it contains position() call; sort by document order
+ if (_test != predicate_posinv) set.sort_do();
+
+ bool once = eval_once(set.type(), eval);
+
+ apply_predicate(set, 0, stack, once);
+
+ return set;
+ }
+
+ case ast_func_id:
+ return xpath_node_set_raw();
+
+ case ast_step:
+ {
+ switch (_axis)
+ {
+ case axis_ancestor:
+ return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
+
+ case axis_ancestor_or_self:
+ return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
+
+ case axis_attribute:
+ return step_do(c, stack, eval, axis_to_type<axis_attribute>());
+
+ case axis_child:
+ return step_do(c, stack, eval, axis_to_type<axis_child>());
+
+ case axis_descendant:
+ return step_do(c, stack, eval, axis_to_type<axis_descendant>());
+
+ case axis_descendant_or_self:
+ return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
+
+ case axis_following:
+ return step_do(c, stack, eval, axis_to_type<axis_following>());
+
+ case axis_following_sibling:
+ return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
+
+ case axis_namespace:
+ // namespaced axis is not supported
+ return xpath_node_set_raw();
+
+ case axis_parent:
+ return step_do(c, stack, eval, axis_to_type<axis_parent>());
+
+ case axis_preceding:
+ return step_do(c, stack, eval, axis_to_type<axis_preceding>());
+
+ case axis_preceding_sibling:
+ return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
+
+ case axis_self:
+ return step_do(c, stack, eval, axis_to_type<axis_self>());
+
+ default:
+ assert(!"Unknown axis");
+ return xpath_node_set_raw();
+ }
+ }
+
+ case ast_step_root:
+ {
+ assert(!_right); // root step can't have any predicates
+
+ xpath_node_set_raw ns;
+
+ ns.set_type(xpath_node_set::type_sorted);
+
+ if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
+ else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
+
+ return ns;
+ }
+
+ case ast_variable:
+ {
+ assert(_rettype == _data.variable->type());
+
+ if (_rettype == xpath_type_node_set)
+ {
+ const xpath_node_set& s = _data.variable->get_node_set();
+
+ xpath_node_set_raw ns;
+
+ ns.set_type(s.type());
+ ns.append(s.begin(), s.end(), stack.result);
+
+ return ns;
+ }
+
+ // fallthrough to type conversion
+ }
+
+ default:
+ assert(!"Wrong expression for return type node set");
+ return xpath_node_set_raw();
+ }
+ }
+
+ void optimize(xpath_allocator* alloc)
+ {
+ if (_left) _left->optimize(alloc);
+ if (_right) _right->optimize(alloc);
+ if (_next) _next->optimize(alloc);
+
+ // Rewrite [position()=expr] with [expr]
+ // Note that this step has to go before classification to recognize [position()=1]
+ if ((_type == ast_filter || _type == ast_predicate) &&
+ _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
+ {
+ _right = _right->_right;
+ }
+
+ // Classify filter/predicate ops to perform various optimizations during evaluation
+ if (_type == ast_filter || _type == ast_predicate)
+ {
+ assert(_test == predicate_default);
+
+ if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
+ _test = predicate_constant_one;
+ else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
+ _test = predicate_constant;
+ else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
+ _test = predicate_posinv;
+ }
+
+ // Rewrite descendant-or-self::node()/child::foo with descendant::foo
+ // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
+ // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
+ // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
+ if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left &&
+ _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
+ is_posinv_step())
+ {
+ if (_axis == axis_child || _axis == axis_descendant)
+ _axis = axis_descendant;
+ else
+ _axis = axis_descendant_or_self;
+
+ _left = _left->_left;
+ }
+
+ // Use optimized lookup table implementation for translate() with constant arguments
+ if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
+ {
+ unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
+
+ if (table)
+ {
+ _type = ast_opt_translate_table;
+ _data.table = table;
+ }
+ }
+
+ // Use optimized path for @attr = 'value' or @attr = $value
+ if (_type == ast_op_equal &&
+ _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
+ (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
+ {
+ _type = ast_opt_compare_attribute;
+ }
+ }
+
+ bool is_posinv_expr() const
+ {
+ switch (_type)
+ {
+ case ast_func_position:
+ case ast_func_last:
+ return false;
+
+ case ast_string_constant:
+ case ast_number_constant:
+ case ast_variable:
+ return true;
+
+ case ast_step:
+ case ast_step_root:
+ return true;
+
+ case ast_predicate:
+ case ast_filter:
+ return true;
+
+ default:
+ if (_left && !_left->is_posinv_expr()) return false;
+
+ for (xpath_ast_node* n = _right; n; n = n->_next)
+ if (!n->is_posinv_expr()) return false;
+
+ return true;
+ }
+ }
+
+ bool is_posinv_step() const
+ {
+ assert(_type == ast_step);
+
+ for (xpath_ast_node* n = _right; n; n = n->_next)
+ {
+ assert(n->_type == ast_predicate);
+
+ if (n->_test != predicate_posinv)
+ return false;
+ }
+
+ return true;
+ }
+
+ xpath_value_type rettype() const
+ {
+ return static_cast<xpath_value_type>(_rettype);
+ }
+ };
+
+ struct xpath_parser
+ {
+ xpath_allocator* _alloc;
+ xpath_lexer _lexer;
+
+ const char_t* _query;
+ xpath_variable_set* _variables;
+
+ xpath_parse_result* _result;
+
+ char_t _scratch[32];
+
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ jmp_buf _error_handler;
+ #endif
+
+ void throw_error(const char* message)
+ {
+ _result->error = message;
+ _result->offset = _lexer.current_pos() - _query;
+
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ longjmp(_error_handler, 1);
+ #else
+ throw xpath_exception(*_result);
+ #endif
+ }
+
+ void throw_error_oom()
+ {
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ throw_error("Out of memory");
+ #else
+ throw std::bad_alloc();
+ #endif
+ }
+
+ void* alloc_node()
+ {
+ void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
+
+ if (!result) throw_error_oom();
+
+ return result;
+ }
+
+ const char_t* alloc_string(const xpath_lexer_string& value)
+ {
+ if (value.begin)
+ {
+ size_t length = static_cast<size_t>(value.end - value.begin);
+
+ char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
+ if (!c) throw_error_oom();
+ assert(c); // workaround for clang static analysis
+
+ memcpy(c, value.begin, length * sizeof(char_t));
+ c[length] = 0;
+
+ return c;
+ }
+ else return 0;
+ }
+
+ xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
+ {
+ assert(argc <= 1);
+
+ if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
+ }
+
+ xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
+ {
+ switch (name.begin[0])
+ {
+ case 'b':
+ if (name == PUGIXML_TEXT("boolean") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
+
+ break;
+
+ case 'c':
+ if (name == PUGIXML_TEXT("count") && argc == 1)
+ {
+ if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+ return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
+ }
+ else if (name == PUGIXML_TEXT("contains") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("concat") && argc >= 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
+
+ break;
+
+ case 'f':
+ if (name == PUGIXML_TEXT("false") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
+ else if (name == PUGIXML_TEXT("floor") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
+
+ break;
+
+ case 'i':
+ if (name == PUGIXML_TEXT("id") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
+
+ break;
+
+ case 'l':
+ if (name == PUGIXML_TEXT("last") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
+ else if (name == PUGIXML_TEXT("lang") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
+ else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
+ return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
+
+ break;
+
+ case 'n':
+ if (name == PUGIXML_TEXT("name") && argc <= 1)
+ return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
+ else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
+ return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
+ else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("not") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
+ else if (name == PUGIXML_TEXT("number") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
+
+ break;
+
+ case 'p':
+ if (name == PUGIXML_TEXT("position") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
+
+ break;
+
+ case 'r':
+ if (name == PUGIXML_TEXT("round") && argc == 1)
+ return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
+
+ break;
+
+ case 's':
+ if (name == PUGIXML_TEXT("string") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
+ else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
+ return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
+ else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
+ return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
+ return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("sum") && argc == 1)
+ {
+ if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+ return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
+ }
+
+ break;
+
+ case 't':
+ if (name == PUGIXML_TEXT("translate") && argc == 3)
+ return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
+ else if (name == PUGIXML_TEXT("true") && argc == 0)
+ return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
+
+ break;
+
+ default:
+ break;
+ }
+
+ throw_error("Unrecognized function or wrong parameter count");
+
+ return 0;
+ }
+
+ axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
+ {
+ specified = true;
+
+ switch (name.begin[0])
+ {
+ case 'a':
+ if (name == PUGIXML_TEXT("ancestor"))
+ return axis_ancestor;
+ else if (name == PUGIXML_TEXT("ancestor-or-self"))
+ return axis_ancestor_or_self;
+ else if (name == PUGIXML_TEXT("attribute"))
+ return axis_attribute;
+
+ break;
+
+ case 'c':
+ if (name == PUGIXML_TEXT("child"))
+ return axis_child;
+
+ break;
+
+ case 'd':
+ if (name == PUGIXML_TEXT("descendant"))
+ return axis_descendant;
+ else if (name == PUGIXML_TEXT("descendant-or-self"))
+ return axis_descendant_or_self;
+
+ break;
+
+ case 'f':
+ if (name == PUGIXML_TEXT("following"))
+ return axis_following;
+ else if (name == PUGIXML_TEXT("following-sibling"))
+ return axis_following_sibling;
+
+ break;
+
+ case 'n':
+ if (name == PUGIXML_TEXT("namespace"))
+ return axis_namespace;
+
+ break;
+
+ case 'p':
+ if (name == PUGIXML_TEXT("parent"))
+ return axis_parent;
+ else if (name == PUGIXML_TEXT("preceding"))
+ return axis_preceding;
+ else if (name == PUGIXML_TEXT("preceding-sibling"))
+ return axis_preceding_sibling;
+
+ break;
+
+ case 's':
+ if (name == PUGIXML_TEXT("self"))
+ return axis_self;
+
+ break;
+
+ default:
+ break;
+ }
+
+ specified = false;
+ return axis_child;
+ }
+
+ nodetest_t parse_node_test_type(const xpath_lexer_string& name)
+ {
+ switch (name.begin[0])
+ {
+ case 'c':
+ if (name == PUGIXML_TEXT("comment"))
+ return nodetest_type_comment;
+
+ break;
+
+ case 'n':
+ if (name == PUGIXML_TEXT("node"))
+ return nodetest_type_node;
+
+ break;
+
+ case 'p':
+ if (name == PUGIXML_TEXT("processing-instruction"))
+ return nodetest_type_pi;
+
+ break;
+
+ case 't':
+ if (name == PUGIXML_TEXT("text"))
+ return nodetest_type_text;
+
+ break;
+
+ default:
+ break;
+ }
+
+ return nodetest_none;
+ }
+
+ // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
+ xpath_ast_node* parse_primary_expression()
+ {
+ switch (_lexer.current())
+ {
+ case lex_var_ref:
+ {
+ xpath_lexer_string name = _lexer.contents();
+
+ if (!_variables)
+ throw_error("Unknown variable: variable set is not provided");
+
+ xpath_variable* var = get_variable_scratch(_scratch, _variables, name.begin, name.end);
+
+ if (!var)
+ throw_error("Unknown variable: variable set does not contain the given name");
+
+ _lexer.next();
+
+ return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
+ }
+
+ case lex_open_brace:
+ {
+ _lexer.next();
+
+ xpath_ast_node* n = parse_expression();
+
+ if (_lexer.current() != lex_close_brace)
+ throw_error("Unmatched braces");
+
+ _lexer.next();
+
+ return n;
+ }
+
+ case lex_quoted_string:
+ {
+ const char_t* value = alloc_string(_lexer.contents());
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
+ _lexer.next();
+
+ return n;
+ }
+
+ case lex_number:
+ {
+ double value = 0;
+
+ if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
+ throw_error_oom();
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
+ _lexer.next();
+
+ return n;
+ }
+
+ case lex_string:
+ {
+ xpath_ast_node* args[2] = {0};
+ size_t argc = 0;
+
+ xpath_lexer_string function = _lexer.contents();
+ _lexer.next();
+
+ xpath_ast_node* last_arg = 0;
+
+ if (_lexer.current() != lex_open_brace)
+ throw_error("Unrecognized function call");
+ _lexer.next();
+
+ if (_lexer.current() != lex_close_brace)
+ args[argc++] = parse_expression();
+
+ while (_lexer.current() != lex_close_brace)
+ {
+ if (_lexer.current() != lex_comma)
+ throw_error("No comma between function arguments");
+ _lexer.next();
+
+ xpath_ast_node* n = parse_expression();
+
+ if (argc < 2) args[argc] = n;
+ else last_arg->set_next(n);
+
+ argc++;
+ last_arg = n;
+ }
+
+ _lexer.next();
+
+ return parse_function(function, argc, args);
+ }
+
+ default:
+ throw_error("Unrecognizable primary expression");
+
+ return 0;
+ }
+ }
+
+ // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
+ // Predicate ::= '[' PredicateExpr ']'
+ // PredicateExpr ::= Expr
+ xpath_ast_node* parse_filter_expression()
+ {
+ xpath_ast_node* n = parse_primary_expression();
+
+ while (_lexer.current() == lex_open_square_brace)
+ {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_expression();
+
+ if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
+
+ n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default);
+
+ if (_lexer.current() != lex_close_square_brace)
+ throw_error("Unmatched square brace");
+
+ _lexer.next();
+ }
+
+ return n;
+ }
+
+ // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
+ // AxisSpecifier ::= AxisName '::' | '@'?
+ // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
+ // NameTest ::= '*' | NCName ':' '*' | QName
+ // AbbreviatedStep ::= '.' | '..'
+ xpath_ast_node* parse_step(xpath_ast_node* set)
+ {
+ if (set && set->rettype() != xpath_type_node_set)
+ throw_error("Step has to be applied to node set");
+
+ bool axis_specified = false;
+ axis_t axis = axis_child; // implied child axis
+
+ if (_lexer.current() == lex_axis_attribute)
+ {
+ axis = axis_attribute;
+ axis_specified = true;
+
+ _lexer.next();
+ }
+ else if (_lexer.current() == lex_dot)
+ {
+ _lexer.next();
+
+ return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
+ }
+ else if (_lexer.current() == lex_double_dot)
+ {
+ _lexer.next();
+
+ return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
+ }
+
+ nodetest_t nt_type = nodetest_none;
+ xpath_lexer_string nt_name;
+
+ if (_lexer.current() == lex_string)
+ {
+ // node name test
+ nt_name = _lexer.contents();
+ _lexer.next();
+
+ // was it an axis name?
+ if (_lexer.current() == lex_double_colon)
+ {
+ // parse axis name
+ if (axis_specified) throw_error("Two axis specifiers in one step");
+
+ axis = parse_axis_name(nt_name, axis_specified);
+
+ if (!axis_specified) throw_error("Unknown axis");
+
+ // read actual node test
+ _lexer.next();
+
+ if (_lexer.current() == lex_multiply)
+ {
+ nt_type = nodetest_all;
+ nt_name = xpath_lexer_string();
+ _lexer.next();
+ }
+ else if (_lexer.current() == lex_string)
+ {
+ nt_name = _lexer.contents();
+ _lexer.next();
+ }
+ else throw_error("Unrecognized node test");
+ }
+
+ if (nt_type == nodetest_none)
+ {
+ // node type test or processing-instruction
+ if (_lexer.current() == lex_open_brace)
+ {
+ _lexer.next();
+
+ if (_lexer.current() == lex_close_brace)
+ {
+ _lexer.next();
+
+ nt_type = parse_node_test_type(nt_name);
+
+ if (nt_type == nodetest_none) throw_error("Unrecognized node type");
+
+ nt_name = xpath_lexer_string();
+ }
+ else if (nt_name == PUGIXML_TEXT("processing-instruction"))
+ {
+ if (_lexer.current() != lex_quoted_string)
+ throw_error("Only literals are allowed as arguments to processing-instruction()");
+
+ nt_type = nodetest_pi;
+ nt_name = _lexer.contents();
+ _lexer.next();
+
+ if (_lexer.current() != lex_close_brace)
+ throw_error("Unmatched brace near processing-instruction()");
+ _lexer.next();
+ }
+ else
+ throw_error("Unmatched brace near node type test");
+
+ }
+ // QName or NCName:*
+ else
+ {
+ if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
+ {
+ nt_name.end--; // erase *
+
+ nt_type = nodetest_all_in_namespace;
+ }
+ else nt_type = nodetest_name;
+ }
+ }
+ }
+ else if (_lexer.current() == lex_multiply)
+ {
+ nt_type = nodetest_all;
+ _lexer.next();
+ }
+ else throw_error("Unrecognized node test");
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
+
+ xpath_ast_node* last = 0;
+
+ while (_lexer.current() == lex_open_square_brace)
+ {
+ _lexer.next();
+
+ xpath_ast_node* expr = parse_expression();
+
+ xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default);
+
+ if (_lexer.current() != lex_close_square_brace)
+ throw_error("Unmatched square brace");
+ _lexer.next();
+
+ if (last) last->set_next(pred);
+ else n->set_right(pred);
+
+ last = pred;
+ }
+
+ return n;
+ }
+
+ // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
+ xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
+ {
+ xpath_ast_node* n = parse_step(set);
+
+ while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
+ {
+ lexeme_t l = _lexer.current();
+ _lexer.next();
+
+ if (l == lex_double_slash)
+ n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+
+ n = parse_step(n);
+ }
+
+ return n;
+ }
+
+ // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
+ // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
+ xpath_ast_node* parse_location_path()
+ {
+ if (_lexer.current() == lex_slash)
+ {
+ _lexer.next();
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
+
+ // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
+ lexeme_t l = _lexer.current();
+
+ if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
+ return parse_relative_location_path(n);
+ else
+ return n;
+ }
+ else if (_lexer.current() == lex_double_slash)
+ {
+ _lexer.next();
+
+ xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
+ n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+
+ return parse_relative_location_path(n);
+ }
+
+ // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
+ return parse_relative_location_path(0);
+ }
+
+ // PathExpr ::= LocationPath
+ // | FilterExpr
+ // | FilterExpr '/' RelativeLocationPath
+ // | FilterExpr '//' RelativeLocationPath
+ // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
+ // UnaryExpr ::= UnionExpr | '-' UnaryExpr
+ xpath_ast_node* parse_path_or_unary_expression()
+ {
+ // Clarification.
+ // PathExpr begins with either LocationPath or FilterExpr.
+ // FilterExpr begins with PrimaryExpr
+ // PrimaryExpr begins with '$' in case of it being a variable reference,
+ // '(' in case of it being an expression, string literal, number constant or
+ // function call.
+
+ if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
+ _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
+ _lexer.current() == lex_string)
+ {
+ if (_lexer.current() == lex_string)
+ {
+ // This is either a function call, or not - if not, we shall proceed with location path
+ const char_t* state = _lexer.state();
+
+ while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
+
+ if (*state != '(') return parse_location_path();
+
+ // This looks like a function call; however this still can be a node-test. Check it.
+ if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
+ }
+
+ xpath_ast_node* n = parse_filter_expression();
+
+ if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
+ {
+ lexeme_t l = _lexer.current();
+ _lexer.next();
+
+ if (l == lex_double_slash)
+ {
+ if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
+
+ n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+ }
+
+ // select from location path
+ return parse_relative_location_path(n);
+ }
+
+ return n;
+ }
+ else if (_lexer.current() == lex_minus)
+ {
+ _lexer.next();
+
+ // precedence 7+ - only parses union expressions
+ xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
+
+ return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
+ }
+ else
+ return parse_location_path();
+ }
+
+ struct binary_op_t
+ {
+ ast_type_t asttype;
+ xpath_value_type rettype;
+ int precedence;
+
+ binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
+ {
+ }
+
+ binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
+ {
+ }
+
+ static binary_op_t parse(xpath_lexer& lexer)
+ {
+ switch (lexer.current())
+ {
+ case lex_string:
+ if (lexer.contents() == PUGIXML_TEXT("or"))
+ return binary_op_t(ast_op_or, xpath_type_boolean, 1);
+ else if (lexer.contents() == PUGIXML_TEXT("and"))
+ return binary_op_t(ast_op_and, xpath_type_boolean, 2);
+ else if (lexer.contents() == PUGIXML_TEXT("div"))
+ return binary_op_t(ast_op_divide, xpath_type_number, 6);
+ else if (lexer.contents() == PUGIXML_TEXT("mod"))
+ return binary_op_t(ast_op_mod, xpath_type_number, 6);
+ else
+ return binary_op_t();
+
+ case lex_equal:
+ return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
+
+ case lex_not_equal:
+ return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
+
+ case lex_less:
+ return binary_op_t(ast_op_less, xpath_type_boolean, 4);
+
+ case lex_greater:
+ return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
+
+ case lex_less_or_equal:
+ return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
+
+ case lex_greater_or_equal:
+ return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
+
+ case lex_plus:
+ return binary_op_t(ast_op_add, xpath_type_number, 5);
+
+ case lex_minus:
+ return binary_op_t(ast_op_subtract, xpath_type_number, 5);
+
+ case lex_multiply:
+ return binary_op_t(ast_op_multiply, xpath_type_number, 6);
+
+ case lex_union:
+ return binary_op_t(ast_op_union, xpath_type_node_set, 7);
+
+ default:
+ return binary_op_t();
+ }
+ }
+ };
+
+ xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
+ {
+ binary_op_t op = binary_op_t::parse(_lexer);
+
+ while (op.asttype != ast_unknown && op.precedence >= limit)
+ {
+ _lexer.next();
+
+ xpath_ast_node* rhs = parse_path_or_unary_expression();
+
+ binary_op_t nextop = binary_op_t::parse(_lexer);
+
+ while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
+ {
+ rhs = parse_expression_rec(rhs, nextop.precedence);
+
+ nextop = binary_op_t::parse(_lexer);
+ }
+
+ if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
+ throw_error("Union operator has to be applied to node sets");
+
+ lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
+
+ op = binary_op_t::parse(_lexer);
+ }
+
+ return lhs;
+ }
+
+ // Expr ::= OrExpr
+ // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
+ // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
+ // EqualityExpr ::= RelationalExpr
+ // | EqualityExpr '=' RelationalExpr
+ // | EqualityExpr '!=' RelationalExpr
+ // RelationalExpr ::= AdditiveExpr
+ // | RelationalExpr '<' AdditiveExpr
+ // | RelationalExpr '>' AdditiveExpr
+ // | RelationalExpr '<=' AdditiveExpr
+ // | RelationalExpr '>=' AdditiveExpr
+ // AdditiveExpr ::= MultiplicativeExpr
+ // | AdditiveExpr '+' MultiplicativeExpr
+ // | AdditiveExpr '-' MultiplicativeExpr
+ // MultiplicativeExpr ::= UnaryExpr
+ // | MultiplicativeExpr '*' UnaryExpr
+ // | MultiplicativeExpr 'div' UnaryExpr
+ // | MultiplicativeExpr 'mod' UnaryExpr
+ xpath_ast_node* parse_expression()
+ {
+ return parse_expression_rec(parse_path_or_unary_expression(), 0);
+ }
+
+ xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
+ {
+ }
+
+ xpath_ast_node* parse()
+ {
+ xpath_ast_node* result = parse_expression();
+
+ if (_lexer.current() != lex_eof)
+ {
+ // there are still unparsed tokens left, error
+ throw_error("Incorrect query");
+ }
+
+ return result;
+ }
+
+ static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
+ {
+ xpath_parser parser(query, variables, alloc, result);
+
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ int error = setjmp(parser._error_handler);
+
+ return (error == 0) ? parser.parse() : 0;
+ #else
+ return parser.parse();
+ #endif
+ }
+ };
+
+ struct xpath_query_impl
+ {
+ static xpath_query_impl* create()
+ {
+ void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
+
+ return new (memory) xpath_query_impl();
+ }
+
+ static void destroy(void* ptr)
+ {
+ if (!ptr) return;
+
+ // free all allocated pages
+ static_cast<xpath_query_impl*>(ptr)->alloc.release();
+
+ // free allocator memory (with the first page)
+ xml_memory::deallocate(ptr);
+ }
+
+ xpath_query_impl(): root(0), alloc(&block)
+ {
+ block.next = 0;
+ block.capacity = sizeof(block.data);
+ }
+
+ xpath_ast_node* root;
+ xpath_allocator alloc;
+ xpath_memory_block block;
+ };
+
+ PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
+ {
+ if (!impl) return xpath_string();
+
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return xpath_string();
+ #endif
+
+ xpath_context c(n, 1, 1);
+
+ return impl->root->eval_string(c, sd.stack);
+ }
+
+ PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
+ {
+ if (!impl) return 0;
+
+ if (impl->root->rettype() != xpath_type_node_set)
+ {
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ return 0;
+ #else
+ xpath_parse_result res;
+ res.error = "Expression does not evaluate to node set";
+
+ throw xpath_exception(res);
+ #endif
+ }
+
+ return impl->root;
+ }
+PUGI__NS_END
+
+namespace pugi
+{
+#ifndef PUGIXML_NO_EXCEPTIONS
+ PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
+ {
+ assert(_result.error);
+ }
+
+ PUGI__FN const char* xpath_exception::what() const throw()
+ {
+ return _result.error;
+ }
+
+ PUGI__FN const xpath_parse_result& xpath_exception::result() const
+ {
+ return _result;
+ }
+#endif
+
+ PUGI__FN xpath_node::xpath_node()
+ {
+ }
+
+ PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
+ {
+ }
+
+ PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
+ {
+ }
+
+ PUGI__FN xml_node xpath_node::node() const
+ {
+ return _attribute ? xml_node() : _node;
+ }
+
+ PUGI__FN xml_attribute xpath_node::attribute() const
+ {
+ return _attribute;
+ }
+
+ PUGI__FN xml_node xpath_node::parent() const
+ {
+ return _attribute ? _node : _node.parent();
+ }
+
+ PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
+ {
+ }
+
+ PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
+ {
+ return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
+ }
+
+ PUGI__FN bool xpath_node::operator!() const
+ {
+ return !(_node || _attribute);
+ }
+
+ PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
+ {
+ return _node == n._node && _attribute == n._attribute;
+ }
+
+ PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
+ {
+ return _node != n._node || _attribute != n._attribute;
+ }
+
+#ifdef __BORLANDC__
+ PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
+ {
+ return (bool)lhs && rhs;
+ }
+
+ PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
+ {
+ return (bool)lhs || rhs;
+ }
+#endif
+
+ PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_)
+ {
+ assert(begin_ <= end_);
+
+ size_t size_ = static_cast<size_t>(end_ - begin_);
+
+ if (size_ <= 1)
+ {
+ // deallocate old buffer
+ if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
+
+ // use internal buffer
+ if (begin_ != end_) _storage = *begin_;
+
+ _begin = &_storage;
+ _end = &_storage + size_;
+ }
+ else
+ {
+ // make heap copy
+ xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
+
+ if (!storage)
+ {
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ return;
+ #else
+ throw std::bad_alloc();
+ #endif
+ }
+
+ memcpy(storage, begin_, size_ * sizeof(xpath_node));
+
+ // deallocate old buffer
+ if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
+
+ // finalize
+ _begin = storage;
+ _end = storage + size_;
+ }
+ }
+
+ PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
+ {
+ }
+
+ PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_), _begin(&_storage), _end(&_storage)
+ {
+ _assign(begin_, end_);
+ }
+
+ PUGI__FN xpath_node_set::~xpath_node_set()
+ {
+ if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
+ }
+
+ PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage)
+ {
+ _assign(ns._begin, ns._end);
+ }
+
+ PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
+ {
+ if (this == &ns) return *this;
+
+ _type = ns._type;
+ _assign(ns._begin, ns._end);
+
+ return *this;
+ }
+
+ PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
+ {
+ return _type;
+ }
+
+ PUGI__FN size_t xpath_node_set::size() const
+ {
+ return _end - _begin;
+ }
+
+ PUGI__FN bool xpath_node_set::empty() const
+ {
+ return _begin == _end;
+ }
+
+ PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
+ {
+ assert(index < size());
+ return _begin[index];
+ }
+
+ PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
+ {
+ return _begin;
+ }
+
+ PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
+ {
+ return _end;
+ }
+
+ PUGI__FN void xpath_node_set::sort(bool reverse)
+ {
+ _type = impl::xpath_sort(_begin, _end, _type, reverse);
+ }
+
+ PUGI__FN xpath_node xpath_node_set::first() const
+ {
+ return impl::xpath_first(_begin, _end, _type);
+ }
+
+ PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
+ {
+ }
+
+ PUGI__FN xpath_parse_result::operator bool() const
+ {
+ return error == 0;
+ }
+
+ PUGI__FN const char* xpath_parse_result::description() const
+ {
+ return error ? error : "No error";
+ }
+
+ PUGI__FN xpath_variable::xpath_variable(): _type(xpath_type_none), _next(0)
+ {
+ }
+
+ PUGI__FN const char_t* xpath_variable::name() const
+ {
+ switch (_type)
+ {
+ case xpath_type_node_set:
+ return static_cast<const impl::xpath_variable_node_set*>(this)->name;
+
+ case xpath_type_number:
+ return static_cast<const impl::xpath_variable_number*>(this)->name;
+
+ case xpath_type_string:
+ return static_cast<const impl::xpath_variable_string*>(this)->name;
+
+ case xpath_type_boolean:
+ return static_cast<const impl::xpath_variable_boolean*>(this)->name;
+
+ default:
+ assert(!"Invalid variable type");
+ return 0;
+ }
+ }
+
+ PUGI__FN xpath_value_type xpath_variable::type() const
+ {
+ return _type;
+ }
+
+ PUGI__FN bool xpath_variable::get_boolean() const
+ {
+ return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
+ }
+
+ PUGI__FN double xpath_variable::get_number() const
+ {
+ return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
+ }
+
+ PUGI__FN const char_t* xpath_variable::get_string() const
+ {
+ const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
+ return value ? value : PUGIXML_TEXT("");
+ }
+
+ PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
+ {
+ return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
+ }
+
+ PUGI__FN bool xpath_variable::set(bool value)
+ {
+ if (_type != xpath_type_boolean) return false;
+
+ static_cast<impl::xpath_variable_boolean*>(this)->value = value;
+ return true;
+ }
+
+ PUGI__FN bool xpath_variable::set(double value)
+ {
+ if (_type != xpath_type_number) return false;
+
+ static_cast<impl::xpath_variable_number*>(this)->value = value;
+ return true;
+ }
+
+ PUGI__FN bool xpath_variable::set(const char_t* value)
+ {
+ if (_type != xpath_type_string) return false;
+
+ impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
+
+ // duplicate string
+ size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
+
+ char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
+ if (!copy) return false;
+
+ memcpy(copy, value, size);
+
+ // replace old string
+ if (var->value) impl::xml_memory::deallocate(var->value);
+ var->value = copy;
+
+ return true;
+ }
+
+ PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
+ {
+ if (_type != xpath_type_node_set) return false;
+
+ static_cast<impl::xpath_variable_node_set*>(this)->value = value;
+ return true;
+ }
+
+ PUGI__FN xpath_variable_set::xpath_variable_set()
+ {
+ for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
+ }
+
+ PUGI__FN xpath_variable_set::~xpath_variable_set()
+ {
+ for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
+ {
+ xpath_variable* var = _data[i];
+
+ while (var)
+ {
+ xpath_variable* next = var->_next;
+
+ impl::delete_xpath_variable(var->_type, var);
+
+ var = next;
+ }
+ }
+ }
+
+ PUGI__FN xpath_variable* xpath_variable_set::find(const char_t* name) const
+ {
+ const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
+ size_t hash = impl::hash_string(name) % hash_size;
+
+ // look for existing variable
+ for (xpath_variable* var = _data[hash]; var; var = var->_next)
+ if (impl::strequal(var->name(), name))
+ return var;
+
+ return 0;
+ }
+
+ PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
+ {
+ const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
+ size_t hash = impl::hash_string(name) % hash_size;
+
+ // look for existing variable
+ for (xpath_variable* var = _data[hash]; var; var = var->_next)
+ if (impl::strequal(var->name(), name))
+ return var->type() == type ? var : 0;
+
+ // add new variable
+ xpath_variable* result = impl::new_xpath_variable(type, name);
+
+ if (result)
+ {
+ result->_type = type;
+ result->_next = _data[hash];
+
+ _data[hash] = result;
+ }
+
+ return result;
+ }
+
+ PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
+ {
+ xpath_variable* var = add(name, xpath_type_boolean);
+ return var ? var->set(value) : false;
+ }
+
+ PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
+ {
+ xpath_variable* var = add(name, xpath_type_number);
+ return var ? var->set(value) : false;
+ }
+
+ PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
+ {
+ xpath_variable* var = add(name, xpath_type_string);
+ return var ? var->set(value) : false;
+ }
+
+ PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
+ {
+ xpath_variable* var = add(name, xpath_type_node_set);
+ return var ? var->set(value) : false;
+ }
+
+ PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
+ {
+ return find(name);
+ }
+
+ PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
+ {
+ return find(name);
+ }
+
+ PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
+ {
+ impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
+
+ if (!qimpl)
+ {
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ _result.error = "Out of memory";
+ #else
+ throw std::bad_alloc();
+ #endif
+ }
+ else
+ {
+ impl::buffer_holder impl_holder(qimpl, impl::xpath_query_impl::destroy);
+
+ qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
+
+ if (qimpl->root)
+ {
+ qimpl->root->optimize(&qimpl->alloc);
+
+ _impl = static_cast<impl::xpath_query_impl*>(impl_holder.release());
+ _result.error = 0;
+ }
+ }
+ }
+
+ PUGI__FN xpath_query::~xpath_query()
+ {
+ impl::xpath_query_impl::destroy(_impl);
+ }
+
+ PUGI__FN xpath_value_type xpath_query::return_type() const
+ {
+ if (!_impl) return xpath_type_none;
+
+ return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
+ }
+
+ PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
+ {
+ if (!_impl) return false;
+
+ impl::xpath_context c(n, 1, 1);
+ impl::xpath_stack_data sd;
+
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return false;
+ #endif
+
+ return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
+ }
+
+ PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
+ {
+ if (!_impl) return impl::gen_nan();
+
+ impl::xpath_context c(n, 1, 1);
+ impl::xpath_stack_data sd;
+
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return impl::gen_nan();
+ #endif
+
+ return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
+ }
+
+#ifndef PUGIXML_NO_STL
+ PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
+ {
+ impl::xpath_stack_data sd;
+
+ impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
+
+ return string_t(r.c_str(), r.length());
+ }
+#endif
+
+ PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
+ {
+ impl::xpath_stack_data sd;
+
+ impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
+
+ size_t full_size = r.length() + 1;
+
+ if (capacity > 0)
+ {
+ size_t size = (full_size < capacity) ? full_size : capacity;
+ assert(size > 0);
+
+ memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
+ buffer[size - 1] = 0;
+ }
+
+ return full_size;
+ }
+
+ PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
+ {
+ impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
+ if (!root) return xpath_node_set();
+
+ impl::xpath_context c(n, 1, 1);
+ impl::xpath_stack_data sd;
+
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return xpath_node_set();
+ #endif
+
+ impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
+
+ return xpath_node_set(r.begin(), r.end(), r.type());
+ }
+
+ PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
+ {
+ impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
+ if (!root) return xpath_node();
+
+ impl::xpath_context c(n, 1, 1);
+ impl::xpath_stack_data sd;
+
+ #ifdef PUGIXML_NO_EXCEPTIONS
+ if (setjmp(sd.error_handler)) return xpath_node();
+ #endif
+
+ impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
+
+ return r.first();
+ }
+
+ PUGI__FN const xpath_parse_result& xpath_query::result() const
+ {
+ return _result;
+ }
+
+ PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
+ {
+ }
+
+ PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
+ {
+ return _impl ? unspecified_bool_xpath_query : 0;
+ }
+
+ PUGI__FN bool xpath_query::operator!() const
+ {
+ return !_impl;
+ }
+
+ PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
+ {
+ xpath_query q(query, variables);
+ return select_node(q);
+ }
+
+ PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
+ {
+ return query.evaluate_node(*this);
+ }
+
+ PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
+ {
+ xpath_query q(query, variables);
+ return select_nodes(q);
+ }
+
+ PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
+ {
+ return query.evaluate_node_set(*this);
+ }
+
+ PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
+ {
+ xpath_query q(query, variables);
+ return select_single_node(q);
+ }
+
+ PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
+ {
+ return query.evaluate_node(*this);
+ }
+}
+
+#endif
+
+#ifdef __BORLANDC__
+# pragma option pop
+#endif
+
+// Intel C++ does not properly keep warning state for function templates,
+// so popping warning state at the end of translation unit leads to warnings in the middle.
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+# pragma warning(pop)
+#endif
+
+// Undefine all local macros (makes sure we're not leaking macros in header-only mode)
+#undef PUGI__NO_INLINE
+#undef PUGI__UNLIKELY
+#undef PUGI__STATIC_ASSERT
+#undef PUGI__DMC_VOLATILE
+#undef PUGI__MSVC_CRT_VERSION
+#undef PUGI__NS_BEGIN
+#undef PUGI__NS_END
+#undef PUGI__FN
+#undef PUGI__FN_NO_INLINE
+#undef PUGI__NODETYPE
+#undef PUGI__IS_CHARTYPE_IMPL
+#undef PUGI__IS_CHARTYPE
+#undef PUGI__IS_CHARTYPEX
+#undef PUGI__ENDSWITH
+#undef PUGI__SKIPWS
+#undef PUGI__OPTSET
+#undef PUGI__PUSHNODE
+#undef PUGI__POPNODE
+#undef PUGI__SCANFOR
+#undef PUGI__SCANWHILE
+#undef PUGI__SCANWHILE_UNROLL
+#undef PUGI__ENDSEG
+#undef PUGI__THROW_ERROR
+#undef PUGI__CHECK_ERROR
+
+#endif
+
+/**
+ * Copyright (c) 2006-2014 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
--- /dev/null
+/**
+ * pugixml parser - version 1.5
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
+ */
+
+#ifndef PUGIXML_VERSION
+// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
+# define PUGIXML_VERSION 150
+#endif
+
+// Include user configuration file (this can define various configuration macros)
+#include "pugiconfig.hpp"
+
+#ifndef HEADER_PUGIXML_HPP
+#define HEADER_PUGIXML_HPP
+
+// Include stddef.h for size_t and ptrdiff_t
+#include <stddef.h>
+
+// Include exception header for XPath
+#if !defined(PUGIXML_NO_XPATH) && !defined(PUGIXML_NO_EXCEPTIONS)
+# include <exception>
+#endif
+
+// Include STL headers
+#ifndef PUGIXML_NO_STL
+# include <iterator>
+# include <iosfwd>
+# include <string>
+#endif
+
+// Macro for deprecated features
+#ifndef PUGIXML_DEPRECATED
+# if defined(__GNUC__)
+# define PUGIXML_DEPRECATED __attribute__((deprecated))
+# elif defined(_MSC_VER) && _MSC_VER >= 1300
+# define PUGIXML_DEPRECATED __declspec(deprecated)
+# else
+# define PUGIXML_DEPRECATED
+# endif
+#endif
+
+// If no API is defined, assume default
+#ifndef PUGIXML_API
+# define PUGIXML_API
+#endif
+
+// If no API for classes is defined, assume default
+#ifndef PUGIXML_CLASS
+# define PUGIXML_CLASS PUGIXML_API
+#endif
+
+// If no API for functions is defined, assume default
+#ifndef PUGIXML_FUNCTION
+# define PUGIXML_FUNCTION PUGIXML_API
+#endif
+
+// If the platform is known to have long long support, enable long long functions
+#ifndef PUGIXML_HAS_LONG_LONG
+# if defined(__cplusplus) && __cplusplus >= 201103
+# define PUGIXML_HAS_LONG_LONG
+# elif defined(_MSC_VER) && _MSC_VER >= 1400
+# define PUGIXML_HAS_LONG_LONG
+# endif
+#endif
+
+// Character interface macros
+#ifdef PUGIXML_WCHAR_MODE
+# define PUGIXML_TEXT(t) L ## t
+# define PUGIXML_CHAR wchar_t
+#else
+# define PUGIXML_TEXT(t) t
+# define PUGIXML_CHAR char
+#endif
+
+namespace pugi
+{
+ // Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE
+ typedef PUGIXML_CHAR char_t;
+
+#ifndef PUGIXML_NO_STL
+ // String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE
+ typedef std::basic_string<PUGIXML_CHAR, std::char_traits<PUGIXML_CHAR>, std::allocator<PUGIXML_CHAR> > string_t;
+#endif
+}
+
+// The PugiXML namespace
+namespace pugi
+{
+ // Tree node types
+ enum xml_node_type
+ {
+ node_null, // Empty (null) node handle
+ node_document, // A document tree's absolute root
+ node_element, // Element tag, i.e. '<node/>'
+ node_pcdata, // Plain character data, i.e. 'text'
+ node_cdata, // Character data, i.e. '<![CDATA[text]]>'
+ node_comment, // Comment tag, i.e. '<!-- text -->'
+ node_pi, // Processing instruction, i.e. '<?name?>'
+ node_declaration, // Document declaration, i.e. '<?xml version="1.0"?>'
+ node_doctype // Document type declaration, i.e. '<!DOCTYPE doc>'
+ };
+
+ // Parsing options
+
+ // Minimal parsing mode (equivalent to turning all other flags off).
+ // Only elements and PCDATA sections are added to the DOM tree, no text conversions are performed.
+ const unsigned int parse_minimal = 0x0000;
+
+ // This flag determines if processing instructions (node_pi) are added to the DOM tree. This flag is off by default.
+ const unsigned int parse_pi = 0x0001;
+
+ // This flag determines if comments (node_comment) are added to the DOM tree. This flag is off by default.
+ const unsigned int parse_comments = 0x0002;
+
+ // This flag determines if CDATA sections (node_cdata) are added to the DOM tree. This flag is on by default.
+ const unsigned int parse_cdata = 0x0004;
+
+ // This flag determines if plain character data (node_pcdata) that consist only of whitespace are added to the DOM tree.
+ // This flag is off by default; turning it on usually results in slower parsing and more memory consumption.
+ const unsigned int parse_ws_pcdata = 0x0008;
+
+ // This flag determines if character and entity references are expanded during parsing. This flag is on by default.
+ const unsigned int parse_escapes = 0x0010;
+
+ // This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
+ const unsigned int parse_eol = 0x0020;
+
+ // This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default.
+ const unsigned int parse_wconv_attribute = 0x0040;
+
+ // This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default.
+ const unsigned int parse_wnorm_attribute = 0x0080;
+
+ // This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default.
+ const unsigned int parse_declaration = 0x0100;
+
+ // This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default.
+ const unsigned int parse_doctype = 0x0200;
+
+ // This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only
+ // of whitespace is added to the DOM tree.
+ // This flag is off by default; turning it on may result in slower parsing and more memory consumption.
+ const unsigned int parse_ws_pcdata_single = 0x0400;
+
+ // This flag determines if leading and trailing whitespace is to be removed from plain character data. This flag is off by default.
+ const unsigned int parse_trim_pcdata = 0x0800;
+
+ // This flag determines if plain character data that does not have a parent node is added to the DOM tree, and if an empty document
+ // is a valid document. This flag is off by default.
+ const unsigned int parse_fragment = 0x1000;
+
+ // The default parsing mode.
+ // Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
+ // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
+ const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol;
+
+ // The full parsing mode.
+ // Nodes of all types are added to the DOM tree, character/reference entities are expanded,
+ // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
+ const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype;
+
+ // These flags determine the encoding of input data for XML document
+ enum xml_encoding
+ {
+ encoding_auto, // Auto-detect input encoding using BOM or < / <? detection; use UTF8 if BOM is not found
+ encoding_utf8, // UTF8 encoding
+ encoding_utf16_le, // Little-endian UTF16
+ encoding_utf16_be, // Big-endian UTF16
+ encoding_utf16, // UTF16 with native endianness
+ encoding_utf32_le, // Little-endian UTF32
+ encoding_utf32_be, // Big-endian UTF32
+ encoding_utf32, // UTF32 with native endianness
+ encoding_wchar, // The same encoding wchar_t has (either UTF16 or UTF32)
+ encoding_latin1
+ };
+
+ // Formatting flags
+
+ // Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default.
+ const unsigned int format_indent = 0x01;
+
+ // Write encoding-specific BOM to the output stream. This flag is off by default.
+ const unsigned int format_write_bom = 0x02;
+
+ // Use raw output mode (no indentation and no line breaks are written). This flag is off by default.
+ const unsigned int format_raw = 0x04;
+
+ // Omit default XML declaration even if there is no declaration in the document. This flag is off by default.
+ const unsigned int format_no_declaration = 0x08;
+
+ // Don't escape attribute values and PCDATA contents. This flag is off by default.
+ const unsigned int format_no_escapes = 0x10;
+
+ // Open file using text mode in xml_document::save_file. This enables special character (i.e. new-line) conversions on some systems. This flag is off by default.
+ const unsigned int format_save_file_text = 0x20;
+
+ // The default set of formatting flags.
+ // Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
+ const unsigned int format_default = format_indent;
+
+ // Forward declarations
+ struct xml_attribute_struct;
+ struct xml_node_struct;
+
+ class xml_node_iterator;
+ class xml_attribute_iterator;
+ class xml_named_node_iterator;
+
+ class xml_tree_walker;
+
+ struct xml_parse_result;
+
+ class xml_node;
+
+ class xml_text;
+
+ #ifndef PUGIXML_NO_XPATH
+ class xpath_node;
+ class xpath_node_set;
+ class xpath_query;
+ class xpath_variable_set;
+ #endif
+
+ // Range-based for loop support
+ template <typename It> class xml_object_range
+ {
+ public:
+ typedef It const_iterator;
+ typedef It iterator;
+
+ xml_object_range(It b, It e): _begin(b), _end(e)
+ {
+ }
+
+ It begin() const { return _begin; }
+ It end() const { return _end; }
+
+ private:
+ It _begin, _end;
+ };
+
+ // Writer interface for node printing (see xml_node::print)
+ class PUGIXML_CLASS xml_writer
+ {
+ public:
+ virtual ~xml_writer() {}
+
+ // Write memory chunk into stream/file/whatever
+ virtual void write(const void* data, size_t size) = 0;
+ };
+
+ // xml_writer implementation for FILE*
+ class PUGIXML_CLASS xml_writer_file: public xml_writer
+ {
+ public:
+ // Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
+ xml_writer_file(void* file);
+
+ virtual void write(const void* data, size_t size);
+
+ private:
+ void* file;
+ };
+
+ #ifndef PUGIXML_NO_STL
+ // xml_writer implementation for streams
+ class PUGIXML_CLASS xml_writer_stream: public xml_writer
+ {
+ public:
+ // Construct writer from an output stream object
+ xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
+ xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
+
+ virtual void write(const void* data, size_t size);
+
+ private:
+ std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
+ std::basic_ostream<wchar_t, std::char_traits<wchar_t> >* wide_stream;
+ };
+ #endif
+
+ // A light-weight handle for manipulating attributes in DOM tree
+ class PUGIXML_CLASS xml_attribute
+ {
+ friend class xml_attribute_iterator;
+ friend class xml_node;
+
+ private:
+ xml_attribute_struct* _attr;
+
+ typedef void (*unspecified_bool_type)(xml_attribute***);
+
+ public:
+ // Default constructor. Constructs an empty attribute.
+ xml_attribute();
+
+ // Constructs attribute from internal pointer
+ explicit xml_attribute(xml_attribute_struct* attr);
+
+ // Safe bool conversion operator
+ operator unspecified_bool_type() const;
+
+ // Borland C++ workaround
+ bool operator!() const;
+
+ // Comparison operators (compares wrapped attribute pointers)
+ bool operator==(const xml_attribute& r) const;
+ bool operator!=(const xml_attribute& r) const;
+ bool operator<(const xml_attribute& r) const;
+ bool operator>(const xml_attribute& r) const;
+ bool operator<=(const xml_attribute& r) const;
+ bool operator>=(const xml_attribute& r) const;
+
+ // Check if attribute is empty
+ bool empty() const;
+
+ // Get attribute name/value, or "" if attribute is empty
+ const char_t* name() const;
+ const char_t* value() const;
+
+ // Get attribute value, or the default value if attribute is empty
+ const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
+
+ // Get attribute value as a number, or the default value if conversion did not succeed or attribute is empty
+ int as_int(int def = 0) const;
+ unsigned int as_uint(unsigned int def = 0) const;
+ double as_double(double def = 0) const;
+ float as_float(float def = 0) const;
+
+ #ifdef PUGIXML_HAS_LONG_LONG
+ long long as_llong(long long def = 0) const;
+ unsigned long long as_ullong(unsigned long long def = 0) const;
+ #endif
+
+ // Get attribute value as bool (returns true if first character is in '1tTyY' set), or the default value if attribute is empty
+ bool as_bool(bool def = false) const;
+
+ // Set attribute name/value (returns false if attribute is empty or there is not enough memory)
+ bool set_name(const char_t* rhs);
+ bool set_value(const char_t* rhs);
+
+ // Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
+ bool set_value(int rhs);
+ bool set_value(unsigned int rhs);
+ bool set_value(double rhs);
+ bool set_value(float rhs);
+ bool set_value(bool rhs);
+
+ #ifdef PUGIXML_HAS_LONG_LONG
+ bool set_value(long long rhs);
+ bool set_value(unsigned long long rhs);
+ #endif
+
+ // Set attribute value (equivalent to set_value without error checking)
+ xml_attribute& operator=(const char_t* rhs);
+ xml_attribute& operator=(int rhs);
+ xml_attribute& operator=(unsigned int rhs);
+ xml_attribute& operator=(double rhs);
+ xml_attribute& operator=(float rhs);
+ xml_attribute& operator=(bool rhs);
+
+ #ifdef PUGIXML_HAS_LONG_LONG
+ xml_attribute& operator=(long long rhs);
+ xml_attribute& operator=(unsigned long long rhs);
+ #endif
+
+ // Get next/previous attribute in the attribute list of the parent node
+ xml_attribute next_attribute() const;
+ xml_attribute previous_attribute() const;
+
+ // Get hash value (unique for handles to the same object)
+ size_t hash_value() const;
+
+ // Get internal pointer
+ xml_attribute_struct* internal_object() const;
+ };
+
+#ifdef __BORLANDC__
+ // Borland C++ workaround
+ bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs);
+ bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs);
+#endif
+
+ // A light-weight handle for manipulating nodes in DOM tree
+ class PUGIXML_CLASS xml_node
+ {
+ friend class xml_attribute_iterator;
+ friend class xml_node_iterator;
+ friend class xml_named_node_iterator;
+
+ protected:
+ xml_node_struct* _root;
+
+ typedef void (*unspecified_bool_type)(xml_node***);
+
+ public:
+ // Default constructor. Constructs an empty node.
+ xml_node();
+
+ // Constructs node from internal pointer
+ explicit xml_node(xml_node_struct* p);
+
+ // Safe bool conversion operator
+ operator unspecified_bool_type() const;
+
+ // Borland C++ workaround
+ bool operator!() const;
+
+ // Comparison operators (compares wrapped node pointers)
+ bool operator==(const xml_node& r) const;
+ bool operator!=(const xml_node& r) const;
+ bool operator<(const xml_node& r) const;
+ bool operator>(const xml_node& r) const;
+ bool operator<=(const xml_node& r) const;
+ bool operator>=(const xml_node& r) const;
+
+ // Check if node is empty.
+ bool empty() const;
+
+ // Get node type
+ xml_node_type type() const;
+
+ // Get node name, or "" if node is empty or it has no name
+ const char_t* name() const;
+
+ // Get node value, or "" if node is empty or it has no value
+ // Note: For <node>text</node> node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes.
+ const char_t* value() const;
+
+ // Get attribute list
+ xml_attribute first_attribute() const;
+ xml_attribute last_attribute() const;
+
+ // Get children list
+ xml_node first_child() const;
+ xml_node last_child() const;
+
+ // Get next/previous sibling in the children list of the parent node
+ xml_node next_sibling() const;
+ xml_node previous_sibling() const;
+
+ // Get parent node
+ xml_node parent() const;
+
+ // Get root of DOM tree this node belongs to
+ xml_node root() const;
+
+ // Get text object for the current node
+ xml_text text() const;
+
+ // Get child, attribute or next/previous sibling with the specified name
+ xml_node child(const char_t* name) const;
+ xml_attribute attribute(const char_t* name) const;
+ xml_node next_sibling(const char_t* name) const;
+ xml_node previous_sibling(const char_t* name) const;
+
+ // Get child value of current node; that is, value of the first child node of type PCDATA/CDATA
+ const char_t* child_value() const;
+
+ // Get child value of child with specified name. Equivalent to child(name).child_value().
+ const char_t* child_value(const char_t* name) const;
+
+ // Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value)
+ bool set_name(const char_t* rhs);
+ bool set_value(const char_t* rhs);
+
+ // Add attribute with specified name. Returns added attribute, or empty attribute on errors.
+ xml_attribute append_attribute(const char_t* name);
+ xml_attribute prepend_attribute(const char_t* name);
+ xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr);
+ xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr);
+
+ // Add a copy of the specified attribute. Returns added attribute, or empty attribute on errors.
+ xml_attribute append_copy(const xml_attribute& proto);
+ xml_attribute prepend_copy(const xml_attribute& proto);
+ xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr);
+ xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr);
+
+ // Add child node with specified type. Returns added node, or empty node on errors.
+ xml_node append_child(xml_node_type type = node_element);
+ xml_node prepend_child(xml_node_type type = node_element);
+ xml_node insert_child_after(xml_node_type type, const xml_node& node);
+ xml_node insert_child_before(xml_node_type type, const xml_node& node);
+
+ // Add child element with specified name. Returns added node, or empty node on errors.
+ xml_node append_child(const char_t* name);
+ xml_node prepend_child(const char_t* name);
+ xml_node insert_child_after(const char_t* name, const xml_node& node);
+ xml_node insert_child_before(const char_t* name, const xml_node& node);
+
+ // Add a copy of the specified node as a child. Returns added node, or empty node on errors.
+ xml_node append_copy(const xml_node& proto);
+ xml_node prepend_copy(const xml_node& proto);
+ xml_node insert_copy_after(const xml_node& proto, const xml_node& node);
+ xml_node insert_copy_before(const xml_node& proto, const xml_node& node);
+
+ // Move the specified node to become a child of this node. Returns moved node, or empty node on errors.
+ xml_node append_move(const xml_node& moved);
+ xml_node prepend_move(const xml_node& moved);
+ xml_node insert_move_after(const xml_node& moved, const xml_node& node);
+ xml_node insert_move_before(const xml_node& moved, const xml_node& node);
+
+ // Remove specified attribute
+ bool remove_attribute(const xml_attribute& a);
+ bool remove_attribute(const char_t* name);
+
+ // Remove specified child
+ bool remove_child(const xml_node& n);
+ bool remove_child(const char_t* name);
+
+ // Parses buffer as an XML document fragment and appends all nodes as children of the current node.
+ // Copies/converts the buffer, so it may be deleted or changed after the function returns.
+ // Note: append_buffer allocates memory that has the lifetime of the owning document; removing the appended nodes does not immediately reclaim that memory.
+ xml_parse_result append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+ // Find attribute using predicate. Returns first attribute for which predicate returned true.
+ template <typename Predicate> xml_attribute find_attribute(Predicate pred) const
+ {
+ if (!_root) return xml_attribute();
+
+ for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
+ if (pred(attrib))
+ return attrib;
+
+ return xml_attribute();
+ }
+
+ // Find child node using predicate. Returns first child for which predicate returned true.
+ template <typename Predicate> xml_node find_child(Predicate pred) const
+ {
+ if (!_root) return xml_node();
+
+ for (xml_node node = first_child(); node; node = node.next_sibling())
+ if (pred(node))
+ return node;
+
+ return xml_node();
+ }
+
+ // Find node from subtree using predicate. Returns first node from subtree (depth-first), for which predicate returned true.
+ template <typename Predicate> xml_node find_node(Predicate pred) const
+ {
+ if (!_root) return xml_node();
+
+ xml_node cur = first_child();
+
+ while (cur._root && cur._root != _root)
+ {
+ if (pred(cur)) return cur;
+
+ if (cur.first_child()) cur = cur.first_child();
+ else if (cur.next_sibling()) cur = cur.next_sibling();
+ else
+ {
+ while (!cur.next_sibling() && cur._root != _root) cur = cur.parent();
+
+ if (cur._root != _root) cur = cur.next_sibling();
+ }
+ }
+
+ return xml_node();
+ }
+
+ // Find child node by attribute name/value
+ xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;
+ xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const;
+
+ #ifndef PUGIXML_NO_STL
+ // Get the absolute node path from root as a text string.
+ string_t path(char_t delimiter = '/') const;
+ #endif
+
+ // Search for a node by path consisting of node names and . or .. elements.
+ xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const;
+
+ // Recursively traverse subtree with xml_tree_walker
+ bool traverse(xml_tree_walker& walker);
+
+ #ifndef PUGIXML_NO_XPATH
+ // Select single node by evaluating XPath query. Returns first node from the resulting node set.
+ xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const;
+ xpath_node select_node(const xpath_query& query) const;
+
+ // Select node set by evaluating XPath query
+ xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
+ xpath_node_set select_nodes(const xpath_query& query) const;
+
+ // (deprecated: use select_node instead) Select single node by evaluating XPath query.
+ xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
+ xpath_node select_single_node(const xpath_query& query) const;
+
+ #endif
+
+ // Print subtree using a writer object
+ void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
+
+ #ifndef PUGIXML_NO_STL
+ // Print subtree to stream
+ void print(std::basic_ostream<char, std::char_traits<char> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
+ void print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const;
+ #endif
+
+ // Child nodes iterators
+ typedef xml_node_iterator iterator;
+
+ iterator begin() const;
+ iterator end() const;
+
+ // Attribute iterators
+ typedef xml_attribute_iterator attribute_iterator;
+
+ attribute_iterator attributes_begin() const;
+ attribute_iterator attributes_end() const;
+
+ // Range-based for support
+ xml_object_range<xml_node_iterator> children() const;
+ xml_object_range<xml_named_node_iterator> children(const char_t* name) const;
+ xml_object_range<xml_attribute_iterator> attributes() const;
+
+ // Get node offset in parsed file/string (in char_t units) for debugging purposes
+ ptrdiff_t offset_debug() const;
+
+ // Get hash value (unique for handles to the same object)
+ size_t hash_value() const;
+
+ // Get internal pointer
+ xml_node_struct* internal_object() const;
+ };
+
+#ifdef __BORLANDC__
+ // Borland C++ workaround
+ bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs);
+ bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs);
+#endif
+
+ // A helper for working with text inside PCDATA nodes
+ class PUGIXML_CLASS xml_text
+ {
+ friend class xml_node;
+
+ xml_node_struct* _root;
+
+ typedef void (*unspecified_bool_type)(xml_text***);
+
+ explicit xml_text(xml_node_struct* root);
+
+ xml_node_struct* _data_new();
+ xml_node_struct* _data() const;
+
+ public:
+ // Default constructor. Constructs an empty object.
+ xml_text();
+
+ // Safe bool conversion operator
+ operator unspecified_bool_type() const;
+
+ // Borland C++ workaround
+ bool operator!() const;
+
+ // Check if text object is empty
+ bool empty() const;
+
+ // Get text, or "" if object is empty
+ const char_t* get() const;
+
+ // Get text, or the default value if object is empty
+ const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
+
+ // Get text as a number, or the default value if conversion did not succeed or object is empty
+ int as_int(int def = 0) const;
+ unsigned int as_uint(unsigned int def = 0) const;
+ double as_double(double def = 0) const;
+ float as_float(float def = 0) const;
+
+ #ifdef PUGIXML_HAS_LONG_LONG
+ long long as_llong(long long def = 0) const;
+ unsigned long long as_ullong(unsigned long long def = 0) const;
+ #endif
+
+ // Get text as bool (returns true if first character is in '1tTyY' set), or the default value if object is empty
+ bool as_bool(bool def = false) const;
+
+ // Set text (returns false if object is empty or there is not enough memory)
+ bool set(const char_t* rhs);
+
+ // Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
+ bool set(int rhs);
+ bool set(unsigned int rhs);
+ bool set(double rhs);
+ bool set(float rhs);
+ bool set(bool rhs);
+
+ #ifdef PUGIXML_HAS_LONG_LONG
+ bool set(long long rhs);
+ bool set(unsigned long long rhs);
+ #endif
+
+ // Set text (equivalent to set without error checking)
+ xml_text& operator=(const char_t* rhs);
+ xml_text& operator=(int rhs);
+ xml_text& operator=(unsigned int rhs);
+ xml_text& operator=(double rhs);
+ xml_text& operator=(float rhs);
+ xml_text& operator=(bool rhs);
+
+ #ifdef PUGIXML_HAS_LONG_LONG
+ xml_text& operator=(long long rhs);
+ xml_text& operator=(unsigned long long rhs);
+ #endif
+
+ // Get the data node (node_pcdata or node_cdata) for this object
+ xml_node data() const;
+ };
+
+#ifdef __BORLANDC__
+ // Borland C++ workaround
+ bool PUGIXML_FUNCTION operator&&(const xml_text& lhs, bool rhs);
+ bool PUGIXML_FUNCTION operator||(const xml_text& lhs, bool rhs);
+#endif
+
+ // Child node iterator (a bidirectional iterator over a collection of xml_node)
+ class PUGIXML_CLASS xml_node_iterator
+ {
+ friend class xml_node;
+
+ private:
+ mutable xml_node _wrap;
+ xml_node _parent;
+
+ xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent);
+
+ public:
+ // Iterator traits
+ typedef ptrdiff_t difference_type;
+ typedef xml_node value_type;
+ typedef xml_node* pointer;
+ typedef xml_node& reference;
+
+ #ifndef PUGIXML_NO_STL
+ typedef std::bidirectional_iterator_tag iterator_category;
+ #endif
+
+ // Default constructor
+ xml_node_iterator();
+
+ // Construct an iterator which points to the specified node
+ xml_node_iterator(const xml_node& node);
+
+ // Iterator operators
+ bool operator==(const xml_node_iterator& rhs) const;
+ bool operator!=(const xml_node_iterator& rhs) const;
+
+ xml_node& operator*() const;
+ xml_node* operator->() const;
+
+ const xml_node_iterator& operator++();
+ xml_node_iterator operator++(int);
+
+ const xml_node_iterator& operator--();
+ xml_node_iterator operator--(int);
+ };
+
+ // Attribute iterator (a bidirectional iterator over a collection of xml_attribute)
+ class PUGIXML_CLASS xml_attribute_iterator
+ {
+ friend class xml_node;
+
+ private:
+ mutable xml_attribute _wrap;
+ xml_node _parent;
+
+ xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent);
+
+ public:
+ // Iterator traits
+ typedef ptrdiff_t difference_type;
+ typedef xml_attribute value_type;
+ typedef xml_attribute* pointer;
+ typedef xml_attribute& reference;
+
+ #ifndef PUGIXML_NO_STL
+ typedef std::bidirectional_iterator_tag iterator_category;
+ #endif
+
+ // Default constructor
+ xml_attribute_iterator();
+
+ // Construct an iterator which points to the specified attribute
+ xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent);
+
+ // Iterator operators
+ bool operator==(const xml_attribute_iterator& rhs) const;
+ bool operator!=(const xml_attribute_iterator& rhs) const;
+
+ xml_attribute& operator*() const;
+ xml_attribute* operator->() const;
+
+ const xml_attribute_iterator& operator++();
+ xml_attribute_iterator operator++(int);
+
+ const xml_attribute_iterator& operator--();
+ xml_attribute_iterator operator--(int);
+ };
+
+ // Named node range helper
+ class PUGIXML_CLASS xml_named_node_iterator
+ {
+ friend class xml_node;
+
+ public:
+ // Iterator traits
+ typedef ptrdiff_t difference_type;
+ typedef xml_node value_type;
+ typedef xml_node* pointer;
+ typedef xml_node& reference;
+
+ #ifndef PUGIXML_NO_STL
+ typedef std::bidirectional_iterator_tag iterator_category;
+ #endif
+
+ // Default constructor
+ xml_named_node_iterator();
+
+ // Construct an iterator which points to the specified node
+ xml_named_node_iterator(const xml_node& node, const char_t* name);
+
+ // Iterator operators
+ bool operator==(const xml_named_node_iterator& rhs) const;
+ bool operator!=(const xml_named_node_iterator& rhs) const;
+
+ xml_node& operator*() const;
+ xml_node* operator->() const;
+
+ const xml_named_node_iterator& operator++();
+ xml_named_node_iterator operator++(int);
+
+ const xml_named_node_iterator& operator--();
+ xml_named_node_iterator operator--(int);
+
+ private:
+ mutable xml_node _wrap;
+ xml_node _parent;
+ const char_t* _name;
+
+ xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name);
+ };
+
+ // Abstract tree walker class (see xml_node::traverse)
+ class PUGIXML_CLASS xml_tree_walker
+ {
+ friend class xml_node;
+
+ private:
+ int _depth;
+
+ protected:
+ // Get current traversal depth
+ int depth() const;
+
+ public:
+ xml_tree_walker();
+ virtual ~xml_tree_walker();
+
+ // Callback that is called when traversal begins
+ virtual bool begin(xml_node& node);
+
+ // Callback that is called for each node traversed
+ virtual bool for_each(xml_node& node) = 0;
+
+ // Callback that is called when traversal ends
+ virtual bool end(xml_node& node);
+ };
+
+ // Parsing status, returned as part of xml_parse_result object
+ enum xml_parse_status
+ {
+ status_ok = 0, // No error
+
+ status_file_not_found, // File was not found during load_file()
+ status_io_error, // Error reading from file/stream
+ status_out_of_memory, // Could not allocate memory
+ status_internal_error, // Internal error occurred
+
+ status_unrecognized_tag, // Parser could not determine tag type
+
+ status_bad_pi, // Parsing error occurred while parsing document declaration/processing instruction
+ status_bad_comment, // Parsing error occurred while parsing comment
+ status_bad_cdata, // Parsing error occurred while parsing CDATA section
+ status_bad_doctype, // Parsing error occurred while parsing document type declaration
+ status_bad_pcdata, // Parsing error occurred while parsing PCDATA section
+ status_bad_start_element, // Parsing error occurred while parsing start element tag
+ status_bad_attribute, // Parsing error occurred while parsing element attribute
+ status_bad_end_element, // Parsing error occurred while parsing end element tag
+ status_end_element_mismatch,// There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag)
+
+ status_append_invalid_root, // Unable to append nodes since root type is not node_element or node_document (exclusive to xml_node::append_buffer)
+
+ status_no_document_element // Parsing resulted in a document without element nodes
+ };
+
+ // Parsing result
+ struct PUGIXML_CLASS xml_parse_result
+ {
+ // Parsing status (see xml_parse_status)
+ xml_parse_status status;
+
+ // Last parsed offset (in char_t units from start of input data)
+ ptrdiff_t offset;
+
+ // Source document encoding
+ xml_encoding encoding;
+
+ // Default constructor, initializes object to failed state
+ xml_parse_result();
+
+ // Cast to bool operator
+ operator bool() const;
+
+ // Get error description
+ const char* description() const;
+ };
+
+ // Document class (DOM tree root)
+ class PUGIXML_CLASS xml_document: public xml_node
+ {
+ private:
+ char_t* _buffer;
+
+ char _memory[192];
+
+ // Non-copyable semantics
+ xml_document(const xml_document&);
+ const xml_document& operator=(const xml_document&);
+
+ void create();
+ void destroy();
+
+ public:
+ // Default constructor, makes empty document
+ xml_document();
+
+ // Destructor, invalidates all node/attribute handles to this document
+ ~xml_document();
+
+ // Removes all nodes, leaving the empty document
+ void reset();
+
+ // Removes all nodes, then copies the entire contents of the specified document
+ void reset(const xml_document& proto);
+
+ #ifndef PUGIXML_NO_STL
+ // Load document from stream.
+ xml_parse_result load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+ xml_parse_result load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options = parse_default);
+ #endif
+
+ // (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
+ xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
+
+ // Load document from zero-terminated string. No encoding conversions are applied.
+ xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
+
+ // Load document from file
+ xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+ xml_parse_result load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+ // Load document from buffer. Copies/converts the buffer, so it may be deleted or changed after the function returns.
+ xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+ // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
+ // You should ensure that buffer data will persist throughout the document's lifetime, and free the buffer memory manually once document is destroyed.
+ xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+ // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
+ // You should allocate the buffer with pugixml allocation function; document will free the buffer when it is no longer needed (you can't use it anymore).
+ xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+ // Save XML document to writer (semantics is slightly different from xml_node::print, see documentation for details).
+ void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+
+ #ifndef PUGIXML_NO_STL
+ // Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details).
+ void save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+ void save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const;
+ #endif
+
+ // Save XML to file
+ bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+ bool save_file(const wchar_t* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+
+ // Get document element
+ xml_node document_element() const;
+ };
+
+#ifndef PUGIXML_NO_XPATH
+ // XPath query return type
+ enum xpath_value_type
+ {
+ xpath_type_none, // Unknown type (query failed to compile)
+ xpath_type_node_set, // Node set (xpath_node_set)
+ xpath_type_number, // Number
+ xpath_type_string, // String
+ xpath_type_boolean // Boolean
+ };
+
+ // XPath parsing result
+ struct PUGIXML_CLASS xpath_parse_result
+ {
+ // Error message (0 if no error)
+ const char* error;
+
+ // Last parsed offset (in char_t units from string start)
+ ptrdiff_t offset;
+
+ // Default constructor, initializes object to failed state
+ xpath_parse_result();
+
+ // Cast to bool operator
+ operator bool() const;
+
+ // Get error description
+ const char* description() const;
+ };
+
+ // A single XPath variable
+ class PUGIXML_CLASS xpath_variable
+ {
+ friend class xpath_variable_set;
+
+ protected:
+ xpath_value_type _type;
+ xpath_variable* _next;
+
+ xpath_variable();
+
+ // Non-copyable semantics
+ xpath_variable(const xpath_variable&);
+ xpath_variable& operator=(const xpath_variable&);
+
+ public:
+ // Get variable name
+ const char_t* name() const;
+
+ // Get variable type
+ xpath_value_type type() const;
+
+ // Get variable value; no type conversion is performed, default value (false, NaN, empty string, empty node set) is returned on type mismatch error
+ bool get_boolean() const;
+ double get_number() const;
+ const char_t* get_string() const;
+ const xpath_node_set& get_node_set() const;
+
+ // Set variable value; no type conversion is performed, false is returned on type mismatch error
+ bool set(bool value);
+ bool set(double value);
+ bool set(const char_t* value);
+ bool set(const xpath_node_set& value);
+ };
+
+ // A set of XPath variables
+ class PUGIXML_CLASS xpath_variable_set
+ {
+ private:
+ xpath_variable* _data[64];
+
+ // Non-copyable semantics
+ xpath_variable_set(const xpath_variable_set&);
+ xpath_variable_set& operator=(const xpath_variable_set&);
+
+ xpath_variable* find(const char_t* name) const;
+
+ public:
+ // Default constructor/destructor
+ xpath_variable_set();
+ ~xpath_variable_set();
+
+ // Add a new variable or get the existing one, if the types match
+ xpath_variable* add(const char_t* name, xpath_value_type type);
+
+ // Set value of an existing variable; no type conversion is performed, false is returned if there is no such variable or if types mismatch
+ bool set(const char_t* name, bool value);
+ bool set(const char_t* name, double value);
+ bool set(const char_t* name, const char_t* value);
+ bool set(const char_t* name, const xpath_node_set& value);
+
+ // Get existing variable by name
+ xpath_variable* get(const char_t* name);
+ const xpath_variable* get(const char_t* name) const;
+ };
+
+ // A compiled XPath query object
+ class PUGIXML_CLASS xpath_query
+ {
+ private:
+ void* _impl;
+ xpath_parse_result _result;
+
+ typedef void (*unspecified_bool_type)(xpath_query***);
+
+ // Non-copyable semantics
+ xpath_query(const xpath_query&);
+ xpath_query& operator=(const xpath_query&);
+
+ public:
+ // Construct a compiled object from XPath expression.
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on compilation errors.
+ explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0);
+
+ // Destructor
+ ~xpath_query();
+
+ // Get query expression return type
+ xpath_value_type return_type() const;
+
+ // Evaluate expression as boolean value in the specified context; performs type conversion if necessary.
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+ bool evaluate_boolean(const xpath_node& n) const;
+
+ // Evaluate expression as double value in the specified context; performs type conversion if necessary.
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+ double evaluate_number(const xpath_node& n) const;
+
+ #ifndef PUGIXML_NO_STL
+ // Evaluate expression as string value in the specified context; performs type conversion if necessary.
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+ string_t evaluate_string(const xpath_node& n) const;
+ #endif
+
+ // Evaluate expression as string value in the specified context; performs type conversion if necessary.
+ // At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero).
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+ // If PUGIXML_NO_EXCEPTIONS is defined, returns empty set instead.
+ size_t evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const;
+
+ // Evaluate expression as node set in the specified context.
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
+ // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead.
+ xpath_node_set evaluate_node_set(const xpath_node& n) const;
+
+ // Evaluate expression as node set in the specified context.
+ // Return first node in document order, or empty node if node set is empty.
+ // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
+ // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node instead.
+ xpath_node evaluate_node(const xpath_node& n) const;
+
+ // Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode)
+ const xpath_parse_result& result() const;
+
+ // Safe bool conversion operator
+ operator unspecified_bool_type() const;
+
+ // Borland C++ workaround
+ bool operator!() const;
+ };
+
+ #ifndef PUGIXML_NO_EXCEPTIONS
+ // XPath exception class
+ class PUGIXML_CLASS xpath_exception: public std::exception
+ {
+ private:
+ xpath_parse_result _result;
+
+ public:
+ // Construct exception from parse result
+ explicit xpath_exception(const xpath_parse_result& result);
+
+ // Get error message
+ virtual const char* what() const throw();
+
+ // Get parse result
+ const xpath_parse_result& result() const;
+ };
+ #endif
+
+ // XPath node class (either xml_node or xml_attribute)
+ class PUGIXML_CLASS xpath_node
+ {
+ private:
+ xml_node _node;
+ xml_attribute _attribute;
+
+ typedef void (*unspecified_bool_type)(xpath_node***);
+
+ public:
+ // Default constructor; constructs empty XPath node
+ xpath_node();
+
+ // Construct XPath node from XML node/attribute
+ xpath_node(const xml_node& node);
+ xpath_node(const xml_attribute& attribute, const xml_node& parent);
+
+ // Get node/attribute, if any
+ xml_node node() const;
+ xml_attribute attribute() const;
+
+ // Get parent of contained node/attribute
+ xml_node parent() const;
+
+ // Safe bool conversion operator
+ operator unspecified_bool_type() const;
+
+ // Borland C++ workaround
+ bool operator!() const;
+
+ // Comparison operators
+ bool operator==(const xpath_node& n) const;
+ bool operator!=(const xpath_node& n) const;
+ };
+
+#ifdef __BORLANDC__
+ // Borland C++ workaround
+ bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs);
+ bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs);
+#endif
+
+ // A fixed-size collection of XPath nodes
+ class PUGIXML_CLASS xpath_node_set
+ {
+ public:
+ // Collection type
+ enum type_t
+ {
+ type_unsorted, // Not ordered
+ type_sorted, // Sorted by document order (ascending)
+ type_sorted_reverse // Sorted by document order (descending)
+ };
+
+ // Constant iterator type
+ typedef const xpath_node* const_iterator;
+
+ // We define non-constant iterator to be the same as constant iterator so that various generic algorithms (i.e. boost foreach) work
+ typedef const xpath_node* iterator;
+
+ // Default constructor. Constructs empty set.
+ xpath_node_set();
+
+ // Constructs a set from iterator range; data is not checked for duplicates and is not sorted according to provided type, so be careful
+ xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted);
+
+ // Destructor
+ ~xpath_node_set();
+
+ // Copy constructor/assignment operator
+ xpath_node_set(const xpath_node_set& ns);
+ xpath_node_set& operator=(const xpath_node_set& ns);
+
+ // Get collection type
+ type_t type() const;
+
+ // Get collection size
+ size_t size() const;
+
+ // Indexing operator
+ const xpath_node& operator[](size_t index) const;
+
+ // Collection iterators
+ const_iterator begin() const;
+ const_iterator end() const;
+
+ // Sort the collection in ascending/descending order by document order
+ void sort(bool reverse = false);
+
+ // Get first node in the collection by document order
+ xpath_node first() const;
+
+ // Check if collection is empty
+ bool empty() const;
+
+ private:
+ type_t _type;
+
+ xpath_node _storage;
+
+ xpath_node* _begin;
+ xpath_node* _end;
+
+ void _assign(const_iterator begin, const_iterator end);
+ };
+#endif
+
+#ifndef PUGIXML_NO_STL
+ // Convert wide string to UTF8
+ std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
+ std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
+
+ // Convert UTF8 to wide string
+ std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
+ std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
+#endif
+
+ // Memory allocation function interface; returns pointer to allocated memory or NULL on failure
+ typedef void* (*allocation_function)(size_t size);
+
+ // Memory deallocation function interface
+ typedef void (*deallocation_function)(void* ptr);
+
+ // Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions.
+ void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
+
+ // Get current memory management functions
+ allocation_function PUGIXML_FUNCTION get_memory_allocation_function();
+ deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function();
+}
+
+#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
+namespace std
+{
+ // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
+ std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_node_iterator&);
+ std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_attribute_iterator&);
+ std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&);
+}
+#endif
+
+#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
+namespace std
+{
+ // Workarounds for (non-standard) iterator category detection
+ std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_node_iterator&);
+ std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_attribute_iterator&);
+ std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&);
+}
+#endif
+
+#endif
+
+// Make sure implementation is included in header-only mode
+// Use macro expansion in #include to work around QMake (QTBUG-11923)
+#if defined(PUGIXML_HEADER_ONLY) && !defined(PUGIXML_SOURCE)
+# define PUGIXML_SOURCE "pugixml.cpp"
+# include PUGIXML_SOURCE
+#endif
+
+/**
+ * Copyright (c) 2006-2014 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
--- /dev/null
+/* Accuracy.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/Accuracy.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+#ifdef SWIGPYTHON
+%rename(__float__) PacBio::BAM::Accuracy::operator float;
+#else // C#, R
+%rename(ToFloat) PacBio::BAM::Accuracy::operator float;
+#endif
+
+%include <pbbam/Accuracy.h>
\ No newline at end of file
--- /dev/null
+/* AlignmentPrinter.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/AlignmentPrinter.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/AligmentPrinter.h>
\ No newline at end of file
--- /dev/null
+/* BamFile.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/BamFile.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+#ifdef SWIGR
+%ignore PacBio::BAM::BamFile::BamFile(const BamFile&);
+#endif
+
+%ignore PacBio::BAM::BamFile::BamFile(BamFile&&);
+%ignore PacBio::BAM::BamFile::operator=;
+
+%include <pbbam/BamFile.h>
--- /dev/null
+/* BamHeader.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/BamHeader.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+// Hide warnings about "internal" being a C# reserved word
+%warnfilter(314) PacBio::BAM::internal;
+
+%ignore PacBio::BAM::BamHeader::BamHeader(BamHeader&&); // move ctors not used
+%ignore PacBio::BAM::BamHeader::operator=; // assignment operators not used
+
+%template(ProgramInfoList) std::vector<PacBio::BAM::ProgramInfo>;
+%template(ReadGroupInfoList) std::vector<PacBio::BAM::ReadGroupInfo>;
+%template(SequenceInfoList) std::vector<PacBio::BAM::SequenceInfo>;
+
+%include <pbbam/BamHeader.h>
--- /dev/null
+/* BamRecord.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/BamRecord.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+// Hide warnings about "internal" being a C# reserved word
+%warnfilter(314) PacBio::BAM::internal;
+
+// hide warnings about unused methods
+%ignore PacBio::BAM::BamRecord::BamRecord(BamRecordImpl&&);
+%ignore PacBio::BAM::BamRecord::BamRecord(BamRecord&&);
+%ignore PacBio::BAM::BamRecord::operator=;
+
+// ignore static methods, to allow member
+%ignore PacBio::BAM::BamRecord::Clipped(const BamRecord&, const ClipType, const PacBio::BAM::Position, const PacBio::BAM::Position);
+%ignore PacBio::BAM::BamRecord::Mapped(const BamRecord&, const int32_t, const Position, const Strand, const Cigar&, const uint8_t);
+
+// C# gets confused by the const and nonconst overloads
+%ignore PacBio::BAM::BamRecord::Impl() const;
+
+#if defined(SWIGR) || defined(SWIGPYTHON)
+%rename("EncodedPkmean") PacBio::BAM::BamRecord::Pkmean(const std::vector<uint16_t>&);
+%rename("EncodedPkmid") PacBio::BAM::BamRecord::Pkmid(const std::vector<uint16_t>&);
+%rename("EncodedPkmean2") PacBio::BAM::BamRecord::Pkmean2(const std::vector<uint16_t>&);
+%rename("EncodedPkmid2") PacBio::BAM::BamRecord::Pkmid2(const std::vector<uint16_t>&);
+#endif
+
+%include <pbbam/BamRecord.h>
--- /dev/null
+/* BamRecord.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/BamRecordBuilder.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::BamRecordBuilder::BamRecordBuilder(BamRecordBuilder&&); // move ctors not used
+%ignore PacBio::BAM::BamRecordBuilder::operator=;
+
+%ignore PacBio::BAM::BamRecordBuilder::Reset(BamRecord&&);
+%ignore PacBio::BAM::BamRecordBuilder::Cigar(PacBio::BAM::Cigar&&);
+%ignore PacBio::BAM::BamRecordBuilder::Tags(TagCollection&&);
+
+%include <pbbam/BamRecordBuilder.h>
--- /dev/null
+/* BamRecordImpl.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/BamRecordImpl.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::BamRecordImpl::BamRecordImpl(BamRecordImpl&&);
+%ignore PacBio::BAM::BamRecordImpl::operator=;
+
+%include <pbbam/BamRecordImpl.h>
--- /dev/null
+/* BamRecordTag.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/BamRecordTag.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/BamRecordTag.h>
--- /dev/null
+/* BamTagCodec.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/BamTagCodec.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/BamTagCodec.h>
\ No newline at end of file
--- /dev/null
+/* BamWriter.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/BamWriter.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::BamWriter(const BamWriter&); // copy ctor not used
+%ignore PacBio::BAM::BamWriter(BamWriter&&); // move ctor not used
+%ignore PacBio::BAM::BamWriter::operator=; // assignment operators not used
+
+%include <pbbam/BamWriter.h>
--- /dev/null
+# --------------------------------------------- @
+# SWIG
+# --------------------------------------------- @
+
+# general SWIG
+if(${wrapping_swig})
+
+ find_package(SWIG 3.0.5 REQUIRED)
+
+ include(${SWIG_USE_FILE})
+ include_directories(${PacBioBAM_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR})
+
+ #
+ # quash compiler warnings from SWIG-generated code
+ #
+ check_cxx_compiler_flag("-Wno-unused-local-typedefs" HAS_NO_UNUSED_BUT_SET_VARIABLE)
+ if(HAS_NO_UNUSED_BUT_SET_VARIABLE)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-but-set-variable")
+ endif()
+
+ check_cxx_compiler_flag("-Wno-dynamic-class-memaccess" HAS_NO_DYNAMIC_CLASS_MEMACCESS)
+ if (HAS_NO_DYNAMIC_CLASS_MEMACCESS)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-dynamic-class-memaccess")
+ endif()
+
+ check_cxx_compiler_flag("-Wno-unused-parameter" HAS_NO_UNUSED_PARAMETER)
+ if (HAS_NO_UNUSED_PARAMETER)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
+ endif()
+
+ check_cxx_compiler_flag("-Wno-return-local-addr" HAS_NO_RETURN_LOCAL_ADDR)
+ if (HAS_NO_RETURN_LOCAL_ADDR)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-return-local-addr")
+ endif()
+
+ check_cxx_compiler_flag("-Wno-return-type" HAS_NO_RETURN_TYPE)
+ if (HAS_NO_RETURN_TYPE)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-return-type")
+ endif()
+
+ #
+ # SWIG source file properties
+ #
+ set_source_files_properties(PacBioBam.i PROPERTIES CPLUSPLUS ON)
+
+ if (APPLE)
+ else()
+ if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ set_source_files_properties(PacBioBam.i PROPERTIES SWIG_FLAGS "-DSWIGWORDSIZE64")
+ endif()
+ endif()
+
+endif()
+
+# Python
+if(PacBioBAM_wrap_python)
+ include(WrapPython.cmake)
+endif()
+
+# R
+if(PacBioBAM_wrap_r)
+ include(WrapR.cmake)
+endif()
+
+# CSharp
+if(PacBioBAM_wrap_csharp)
+ include(WrapCSharp.cmake)
+endif()
--- /dev/null
+/* Cigar.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/Cigar.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%template(CigarOpList) std::vector<PacBio::BAM::CigarOperation>;
+
+%ignore PacBio::BAM::Cigar::Cigar(Cigar&&);
+%ignore PacBio::BAM::Cigar::operator=;
+
+%include <pbbam/Cigar.h>
--- /dev/null
+/* CigarOperation.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/CigarOperation.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::CigarOperation::CigarOperation(CigarOperation&&);
+%ignore PacBio::BAM::CigarOperation::operator=;
+
+#ifdef SWIGR
+%ignore PacBio::BAM::CigarOperation::CigarOperation(CigarOperationType, uint32_t);
+#endif
+
+%include <pbbam/CigarOperation.h>
+
+// enums aren't always named consistently (at least between Mac/clang/swig & Linux/gcc/swig)
+// so, keep this after the main %include so client source can be consistent
+#ifdef SWIGPYTHON
+%pythoncode %{
+try:
+ UNKNOWN_OP
+ ALIGNMENT_MATCH
+ INSERTION
+ DELETION
+ REFERENCE_SKIP
+ SOFT_CLIP
+ HARD_CLIP
+ PADDING
+ SEQUENCE_MATCH
+ SEQUENCE_MISMATCH
+except NameError:
+ UNKNOWN_OP = CigarOperationType_UNKNOWN_OP
+ ALIGNMENT_MATCH = CigarOperationType_ALIGNMENT_MATCH
+ INSERTION = CigarOperationType_INSERTION
+ DELETION = CigarOperationType_DELETION
+ REFERENCE_SKIP = CigarOperationType_REFERENCE_SKIP
+ SOFT_CLIP = CigarOperationType_SOFT_CLIP
+ HARD_CLIP = CigarOperationType_HARD_CLIP
+ PADDING = CigarOperationType_PADDING
+ SEQUENCE_MATCH = CigarOperationType_SEQUENCE_MATCH
+ SEQUENCE_MISMATCH = CigarOperationType_SEQUENCE_MISMATCH
+%}
+#endif
--- /dev/null
+/* ClipType.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/ClipType.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/ClipType.h>
--- /dev/null
+/* DataSet.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/DataSet.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+// move ctors not used
+%ignore PacBio::BAM::DataSet::DataSet(DataSet&&);
+
+// assignment operators not used
+%ignore PacBio::BAM::DataSet::operator=;
+
+#ifdef SWIGCSHARP
+
+// ignore non-const accessors
+%ignore PacBio::BAM::DataSet::Attribute(const std::string&);
+%ignore PacBio::BAM::DataSet::CreatedAt();
+%ignore PacBio::BAM::DataSet::Extensions();
+%ignore PacBio::BAM::DataSet::ExternalResources();
+%ignore PacBio::BAM::DataSet::Filters();
+%ignore PacBio::BAM::DataSet::Format();
+%ignore PacBio::BAM::DataSet::Metadata();
+%ignore PacBio::BAM::DataSet::MetaType();
+%ignore PacBio::BAM::DataSet::ModifiedAt();
+%ignore PacBio::BAM::DataSet::Name();
+%ignore PacBio::BAM::DataSet::Namespaces();
+%ignore PacBio::BAM::DataSet::ResourceId();
+%ignore PacBio::BAM::DataSet::SubDataSets();
+%ignore PacBio::BAM::DataSet::Tags();
+%ignore PacBio::BAM::DataSet::TimeStampedName();
+%ignore PacBio::BAM::DataSet::UniqueId();
+%ignore PacBio::BAM::DataSet::Version();
+
+// disable operator(s)
+%ignore PacBio::BAM::DataSet::operator+=;
+
+#endif // C#
+
+#ifdef SWIGR
+%ignore PacBio::BAM::DataSet::DataSet(const DataSet::TypeEnum type);
+/*%ignore PacBio::BAM::DataSet::DataSet(const BamFile& bamFile);*/
+#endif // R
+
+
+%include <pbbam/DataSet.h>
--- /dev/null
+/* DataSetTypes.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/internal/DataSetElement.h>
+#include <pbbam/internal/DataSetListElement.h>
+#include <pbbam/internal/DataSetBaseTypes.h>
+#include <pbbam/DataSetTypes.h>
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+%}
+
+%ignore PacBio::BAM::internal::DataSetElement::DataSetElement(DataSetElement&&);
+%ignore PacBio::BAM::internal::DataSetElement::operator=;
+%ignore PacBio::BAM::internal::DataSetElement::operator[];
+/*%rename(__getitem__) PacBio::BAM::internal::DataSetElement::operator[];*/
+
+%ignore PacBio::BAM::internal::XmlName::XmlName(XmlName&&);
+%ignore PacBio::BAM::internal::XmlName::operator=;
+
+#ifdef SWIGCSHARP
+
+// ignore non-const accessors
+%ignore PacBio::BAM::DataSetBase::ExternalResources();
+%ignore PacBio::BAM::DataSetBase::Filters();
+%ignore PacBio::BAM::DataSetBase::Metadata();
+%ignore PacBio::BAM::DataSetBase::Namespaces();
+%ignore PacBio::BAM::DataSetBase::SubDataSets();
+%ignore PacBio::BAM::DataSetMetadata::NumRecords();
+%ignore PacBio::BAM::DataSetMetadata::Provenance();
+%ignore PacBio::BAM::DataSetMetadata::TotalLength();
+%ignore PacBio::BAM::ExternalResource::ExternalResources();
+%ignore PacBio::BAM::Filter::Properties();
+%ignore PacBio::BAM::Property::Name();
+%ignore PacBio::BAM::Property::Operator();
+%ignore PacBio::BAM::Property::Value();
+%ignore PacBio::BAM::Provenance::CreatedBy();
+%ignore PacBio::BAM::Provenance::CommonServicesInstanceId();
+%ignore PacBio::BAM::Provenance::CreatorUserId();
+%ignore PacBio::BAM::Provenance::ParentJobId();
+%ignore PacBio::BAM::Provenance::ParentTool();
+%ignore PacBio::BAM::internal::BaseEntityType::Description();
+%ignore PacBio::BAM::internal::BaseEntityType::Extensions();
+%ignore PacBio::BAM::internal::BaseEntityType::Format();
+%ignore PacBio::BAM::internal::BaseEntityType::ModifiedAt();
+%ignore PacBio::BAM::internal::BaseEntityType::Name();
+%ignore PacBio::BAM::internal::BaseEntityType::ResourceId();
+%ignore PacBio::BAM::internal::BaseEntityType::Tags();
+%ignore PacBio::BAM::internal::BaseEntityType::Version();
+%ignore PacBio::BAM::internal::DataEntityType::Checksum();
+%ignore PacBio::BAM::internal::DataEntityType::EncodedValue();
+%ignore PacBio::BAM::internal::DataEntityType::MetaType();
+%ignore PacBio::BAM::internal::DataEntityType::SimpleValue();
+%ignore PacBio::BAM::internal::DataEntityType::TimeStampedName();
+%ignore PacBio::BAM::internal::DataEntityType::UniqueId();
+%ignore PacBio::BAM::internal::DataEntityType::ValueDataType();
+%ignore PacBio::BAM::internal::DataSetElement::Attribute(const std::string&);
+%ignore PacBio::BAM::internal::DataSetElement::Attributes();
+%ignore PacBio::BAM::internal::DataSetElement::Children();
+%ignore PacBio::BAM::internal::DataSetElement::ChildText(const std::string&);
+%ignore PacBio::BAM::internal::DataSetElement::CreatedAt();
+%ignore PacBio::BAM::internal::DataSetElement::Text();
+%ignore PacBio::BAM::internal::IndexedDataType::FileIndices();
+%ignore PacBio::BAM::internal::StrictEntityType::MetaType();
+%ignore PacBio::BAM::internal::StrictEntityType::TimeStampedName();
+%ignore PacBio::BAM::internal::StrictEntityType::UniqueId();
+
+// disable operator(s)
+%ignore PacBio::BAM::DataSetMetadata::operator+=;
+%ignore PacBio::BAM::ExternalResources::operator+=;
+%ignore PacBio::BAM::Filters::operator+=;
+%ignore PacBio::BAM::DataSetBase::operator+=;
+%ignore PacBio::BAM::SubDataSets::operator+=;
+
+#endif // C#
+
+%include <pbbam/internal/DataSetElement.h>
+
+%ignore PacBio::BAM::internal::DataSetElementList::operator[];
+%ignore PacBio::BAM::internal::DataSetListIterator::operator++;
+%ignore PacBio::BAM::internal::DataSetListConstIterator::operator++;
+
+%include <pbbam/internal/DataSetListElement.h>
+
+%template(ExtensionListElement) PacBio::BAM::internal::DataSetListElement<PacBio::BAM::ExtensionElement>;
+%template(ExternalResourceListElement) PacBio::BAM::internal::DataSetListElement<PacBio::BAM::ExternalResource>;
+%template(FileIndexListElement) PacBio::BAM::internal::DataSetListElement<PacBio::BAM::FileIndex>;
+%template(FilterListElement) PacBio::BAM::internal::DataSetListElement<PacBio::BAM::Filter>;
+%template(PropertyListElement) PacBio::BAM::internal::DataSetListElement<PacBio::BAM::Property>;
+%template(SubDataSetListElement) PacBio::BAM::internal::DataSetListElement<PacBio::BAM::DataSetBase>;
+
+%extend PacBio::BAM::internal::DataSetListElement<PacBio::BAM::ExtensionElement> {
+ PacBio::BAM::ExtensionElement& __getitem__(unsigned int i) { return $self->Child<PacBio::BAM::ExtensionElement>(i); }
+ PacBio::BAM::ExtensionElement& __getitem__(const std::string& s) { return $self->Child<PacBio::BAM::ExtensionElement>(s); }
+}
+%extend PacBio::BAM::internal::DataSetListElement<PacBio::BAM::ExternalResource> {
+ PacBio::BAM::ExternalResource& __getitem__(unsigned int i) { return $self->Child<PacBio::BAM::ExternalResource>(i); }
+ PacBio::BAM::ExternalResource& __getitem__(const std::string& s) { return $self->Child<PacBio::BAM::ExternalResource>(s); }
+}
+%extend PacBio::BAM::internal::DataSetListElement<PacBio::BAM::FileIndex> {
+ PacBio::BAM::FileIndex& __getitem__(unsigned int i) { return $self->Child<PacBio::BAM::FileIndex>(i);}
+ PacBio::BAM::FileIndex& __getitem__(const std::string& s) { return $self->Child<PacBio::BAM::FileIndex>(s);}
+}
+%extend PacBio::BAM::internal::DataSetListElement<PacBio::BAM::Filter> {
+ PacBio::BAM::Filter& __getitem__(unsigned int i) { return $self->Child<PacBio::BAM::Filter>(i); }
+ PacBio::BAM::Filter& __getitem__(const std::string& s) { return $self->Child<PacBio::BAM::Filter>(s); }
+}
+%extend PacBio::BAM::internal::DataSetListElement<PacBio::BAM::Property> {
+ PacBio::BAM::Property& __getitem__(unsigned int i) { return $self->Child<PacBio::BAM::Property>(i); }
+ PacBio::BAM::Property& __getitem__(const std::string& s) { return $self->Child<PacBio::BAM::Property>(s); }
+}
+%extend PacBio::BAM::internal::DataSetListElement<PacBio::BAM::DataSetBase> {
+ PacBio::BAM::DataSetBase& __getitem__(unsigned int i) { return $self->Child<PacBio::BAM::DataSetBase>(i); }
+ PacBio::BAM::DataSetBase& __getitem__(const std::string& s) { return $self->Child<PacBio::BAM::DataSetBase>(s); }
+}
+
+%include <pbbam/internal/DataSetBaseTypes.h>
+%include <pbbam/DataSetTypes.h>
--- /dev/null
+/* EntireFileQuery.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/DataSet.h>
+#include <pbbam/internal/QueryBase.h>
+#include <pbbam/EntireFileQuery.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/DataSet.h>
+%include <pbbam/internal/QueryBase.h>
+%include <pbbam/EntireFileQuery.h>
--- /dev/null
+/* FrameEncodingType.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/FrameEncodingType.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/FrameEncodingType.h>
--- /dev/null
+/* Frames.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/Frames.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::Frames::Frames(Frames&&);
+%ignore PacBio::BAM::Frames::Frames(std::vector<uint16_t>&&);
+%ignore PacBio::BAM::Frames::operator=;
+%ignore PacBio::BAM::Frames::Data(std::vector<uint16_t>&&);
+
+%template(UInt8List) std::vector<uint8_t>;
+%template(UInt16List) std::vector<uint16_t>;
+
+%include <pbbam/Frames.h>
--- /dev/null
+/* GenomicInterval.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/GenomicInterval.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::GenomicInterval::operator=;
+
+%include <pbbam/GenomicInterval.h>
--- /dev/null
+/* GenomicIntervalQuery.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/GenomicIntervalQuery.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/GenomicIntervalQuery.h>
--- /dev/null
+/* GroupQuery.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/GroupQuery.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/GroupQuery.h>
\ No newline at end of file
--- /dev/null
+/* GroupQueryBase.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/GroupQueryBase.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::GroupQueryIterator::operator++;
+%ignore PacBio::BAM::GroupQueryConstIterator::operator++;
+
+%include <pbbam/GroupQueryBase.h>
+
+%extend PacBio::BAM::GroupQueryIterator
+{
+ PacBio::BAM::GroupQueryIterator& incr(void)
+ { return $self->operator++(); }
+
+ std::vector<PacBio::BAM::BamRecord>* value(void)
+ { return $self->operator->(); }
+}
+
+%extend PacBio::BAM::GroupQueryConstIterator
+{
+ PacBio::BAM::GroupQueryConstIterator& incr(void)
+ { return $self->operator++(); }
+
+ const std::vector<PacBio::BAM::BamRecord>* value(void) const
+ { return $self->operator->(); }
+}
\ No newline at end of file
--- /dev/null
+/* IRecordWriter.i */
+%module PacBioBam
+%{
+#include <pbbam/IRecordWriter.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/IRecordWriter.h>
--- /dev/null
+/* IndexedFastaReader.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/IndexedFastaReader.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::IndexedFastaReader::operator=; // assignment operators not used
+
+%include <pbbam/IndexedFastaReader.h>
--- /dev/null
+/* Interval.i */
+%module PacBioBam
+%{
+#include <pbbam/Interval.h>
+#include <pbbam/Position.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/Interval.h>
+
+%template(PositionInterval) PacBio::BAM::Interval<PacBio::BAM::Position>;
--- /dev/null
+/* LocalContextFlags.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/LocalContextFlags.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+#ifdef SWIGCSHARP
+%ignore operator|(const LocalContextFlags, const LocalContextFlags);
+#endif
+
+%include <pbbam/LocalContextFlags.h>
--- /dev/null
+/* Orientation.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/Orientation.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/Orientation.h>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{6E414044-5469-48E4-BA14-1B9888875DD5}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <RootNamespace>PacBio.BAM</RootNamespace>
+ <AssemblyName>PacBio.BAM</AssemblyName>
+ <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug</OutputPath>
+ <DefineConstants>DEBUG;</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ <ConsolePause>false</ConsolePause>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>full</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release</OutputPath>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ <ConsolePause>false</ConsolePause>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="*.cs" />
+ </ItemGroup>
+ <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
+</Project>
--- /dev/null
+/* pbbam.i */
+%module PacBioBam
+%{
+
+/*ifdef SWIGR
+ define SWIG_SHARED_PTR_NAMESPACE boost
+ define SWIG_SHARED_PTR_SUBNAMESPACE
+endif*/
+
+#include <pbbam/Config.h>
+#include <string>
+#include <vector>
+%}
+
+#define SWIG_FILE_WITH_INIT
+#define PBBAM_EXPORT
+
+#ifdef SWIGCSHARP
+%rename(Equals) *::operator==;
+%rename(ToBool) *::operator bool;
+%rename(ToInt) *::operator int;
+%rename(ToUint8) *::operator uint8_t;
+
+%ignore *::operator !=;
+
+// Iterator interfaces are not useful outside of C++
+%ignore *::begin;
+%ignore *::end;
+
+%csmethodmodifiers *::ToString() const "public override";
+
+#endif // SWIGCSHARP
+
+/********* SWIG includes ************/
+
+%include "stdint.i"
+%include "std_common.i"
+
+#ifdef SWIGR
+%include "boost_shared_ptr.i"
+#else
+%include "std_shared_ptr.i"
+#endif
+
+%include "std_map.i"
+%include "std_pair.i"
+%include "std_string.i"
+%include "std_vector.i"
+
+ // TODO: can we call these vectors!?
+%template(StringList) std::vector<std::string>;
+%template(IntList) std::vector<int>;
+%template(UIntList) std::vector<unsigned int>;
+%template(FloatList) std::vector<float>;
+%template(ShortList) std::vector<short>;
+%template(CharList) std::vector<char>;
+
+// exception handling
+%include "exception.i"
+%exception {
+ try {
+ $action
+ } catch (const std::exception& e) {
+ SWIG_exception(SWIG_RuntimeError, e.what());
+ }
+}
+
+/********* PacBioBAM includes ************/
+
+#ifdef SWIGCSHARP
+ // Renames to play nice with C#
+ // (These are used in the dataset support code, where things like
+ // this happen in C++:
+ //
+ // void Extensions(Extensions x) { ... }
+ //
+ // and this poses problems for C#. Renaming should be fine
+ // as it is doubtful we will refer to these classes by name anyway.)
+ //
+%rename(ExtensionsType) PacBio::BAM::Extensions;
+%rename(ExternalResourcesType) PacBio::BAM::ExternalResources;
+%rename(FiltersType) PacBio::BAM::Filters;
+%rename(SubDataSetsType) PacBio::BAM::SubDataSets;
+%rename(ProvenanceType) PacBio::BAM::Provenance;
+%rename(PropertiesType) PacBio::BAM::Properties;
+%rename(FileIndicesType) PacBio::BAM::FileIndices;
+%rename(ParentToolType) PacBio::BAM::ParentTool;
+%rename(CigarType) PacBio::BAM::Cigar;
+#endif
+
+// Basic types
+%include "Accuracy.i"
+%include "BamRecordTag.i"
+%include "CigarOperation.i"
+%include "ClipType.i"
+%include "FrameEncodingType.i"
+%include "Interval.i"
+%include "LocalContextFlags.i"
+%include "Orientation.i"
+%include "Position.i"
+%include "PulseBehavior.i"
+%include "QualityValue.i"
+%include "RecordType.i"
+%include "Strand.i"
+%include "Tag.i"
+
+// Basic type aggregates
+%include "Cigar.i"
+%include "GenomicInterval.i"
+%include "QualityValues.i"
+%include "TagCollection.i"
+
+// keep this guy after the other basic types, hacky but works
+%include "Frames.i"
+
+// Header API
+%include "ProgramInfo.i"
+%include "ReadGroupInfo.i"
+%include "SequenceInfo.i"
+%include "BamHeader.i"
+
+// SAM/BAM format
+%include "IRecordWriter.i"
+%include "BamFile.i"
+%include "BamRecordImpl.i"
+%include "BamRecord.i"
+%include "BamRecordBuilder.i"
+%include "BamTagCodec.i"
+%include "BamWriter.i"
+%include "SamTagCodec.i"
+%include "SamWriter.i"
+
+// DataSet
+%include "DataSetTypes.i"
+%include "DataSet.i"
+
+// Query/iterator API
+%include "QueryBase.i"
+%include "EntireFileQuery.i"
+%include "GenomicIntervalQuery.i"
+%include "ZmwQuery.i"
+%include "ZmwGroupQuery.i"
+
+// PBI
+%include "PbiFile.i"
+%include "PbiRawData.i"
+%include "PbiIndex.i"
+
+// FASTA
+%include "IndexedFastaReader.i"
+
+// Virtual record API
+%include "VirtualRegion.i"
+%include "VirtualZmwBamRecord.i"
+%include "ZmwReadStitcher.i"
+%include "WhitelistedZmwReadStitcher.i"
+
+// Virtual record API - deprecated
+%include "VirtualPolymeraseBamRecord.i"
+%include "VirtualPolymeraseReader.i"
+%include "ZmwWhitelistVirtualReader.i"
--- /dev/null
+/* PbiFile.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/PbiFile.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/PbiFile.h>
\ No newline at end of file
--- /dev/null
+/* PbiIndex.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/PbiIndex.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+/*%ignore PacBio::BAM::IndexResultBlock::IndexResultBlock();*/
+%ignore PacBio::BAM::IndexResultBlock::IndexResultBlock(size_t, size_t);
+
+%ignore PacBio::BAM::PbiIndex::PbiIndex(PbiIndex&&); // move ctors not used
+%ignore PacBio::BAM::PbiIndex::operator=; // assignment operators not used
+%ignore PacBio::BAM::PbiIndeX::VirtualFileOffsets;
+
+%include <pbbam/PbiIndex.h>
--- /dev/null
+/* PbiRawData.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/PbiRawData.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+// move ctors not used
+%ignore PacBio::BAM::PbiRawBarcodeData::PbiRawBarcodeData(PbiRawBarcodeData&&);
+%ignore PacBio::BAM::PbiRawMappedData::PbiRawMappedData(PbiRawMappedData&&);
+%ignore PacBio::BAM::PbiReferenceEntry::PbiReferenceEntry(PbiReferenceEntry&&);
+%ignore PacBio::BAM::PbiRawReferenceData::PbiRawReferenceData(PbiRawReferenceData&&);
+%ignore PacBio::BAM::PbiRawBasicData::PbiRawBasicData(PbiRawBasicData&&);
+%ignore PacBio::BAM::PbiRawData::PbiRawData(PbiRawData&&);
+
+// assignment operators not used
+%ignore PacBio::BAM::PbiRawBarcodeData::operator=;
+%ignore PacBio::BAM::PbiRawMappedData::operator=;
+%ignore PacBio::BAM::PbiReferenceEntry::operator=;
+%ignore PacBio::BAM::PbiRawReferenceData::operator=;
+%ignore PacBio::BAM::PbiRawBasicData::operator=;
+%ignore PacBio::BAM::PbiRawData::operator=;
+
+#ifdef SWIGCSHARP
+// ignore non-const accessors
+%ignore PacBio::BAM::PbiRawData::BarcodeData();
+%ignore PacBio::BAM::PbiRawData::MappedData();
+%ignore PacBio::BAM::PbiRawData::ReferenceData();
+%ignore PacBio::BAM::PbiRawData::BasicData();
+#endif // C#
+
+%include <pbbam/PbiRawData.h>
--- /dev/null
+/* Position.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/Position.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/Position.h>
--- /dev/null
+/* ProgramInfo.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/ProgramInfo.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::ProgramInfo::ProgramInfo(ProgramInfo&&);
+%ignore PacBio::BAM::ProgramInfo::operator=;
+%ignore PacBio::BAM::ProgramInfo::ToSam(const ProgramInfo&); // ignore static method, to allow member
+
+%include <pbbam/ProgramInfo.h>
\ No newline at end of file
--- /dev/null
+/* PulseBehavior.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/PulseBehavior.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/PulseBehavior.h>
--- /dev/null
+/* QualityValue.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/QualityValue.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::QualityValue::operator=;
+
+#ifdef SWIGPYTHON
+%rename(__int__) PacBio::BAM::QualityValue::operator uint8_t;
+#else // R, C#
+%rename(ToInt) PacBio::BAM::QualityValue::operator uint8_t;
+#endif
+
+%include <pbbam/QualityValue.h>
+
--- /dev/null
+/* QualityValues.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/QualityValues.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%template(QualityValueList) std::vector<PacBio::BAM::QualityValue>;
+
+%ignore PacBio::BAM::QualityValues::operator=;
+%ignore PacBio::BAM::QualityValues::QualityValues(QualityValues&&);
+%ignore PacBio::BAM::QualityValues::QualityValues(std::vector<QualityValue>&&);
+
+%include <pbbam/QualityValues.h>
--- /dev/null
+/* QueryBase.i */
+
+%module PacBioBam
+
+%{
+
+#include <pbbam/internal/QueryBase.h>
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+
+%ignore PacBio::BAM::QueryIterator::operator++;
+%ignore PacBio::BAM::QueryConstIterator::operator++;
+
+%ignore PacBio::BAM::internal::QueryIterator::operator++;
+%ignore PacBio::BAM::internal::QueryConstIterator::operator++;
+
+%typemap(csinterfaces) PacBio::BAM::internal::QueryBase<BamRecord> "global::System.Collections.IEnumerable\n, global::System.Collections.Generic.IEnumerable<PacBio.BAM.BamRecord>\n";
+%typemap(cscode) PacBio::BAM::internal::QueryBase<BamRecord>
+%{
+
+ public global::System.Collections.Generic.IEnumerator<PacBio.BAM.BamRecord> GetEnumerator()
+ {
+ var i = this.cbegin();
+ var e = this.cend();
+ while (!i.Equals(e))
+ {
+ yield return i.value();
+ i.incr();
+ }
+ }
+
+ global::System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+%}
+
+namespace std {
+ %template(BamRecordList) std::vector<PacBio::BAM::BamRecord>;
+}
+
+%typemap(csinterfaces) PacBio::BAM::internal::QueryBase<std::vector<BamRecord> > "global::System.Collections.IEnumerable\n, global::System.Collections.Generic.IEnumerable<BamRecordList>\n";
+%typemap(cscode) PacBio::BAM::internal::QueryBase<std::vector<BamRecord> >
+%{
+
+ public global::System.Collections.Generic.IEnumerator<BamRecordList> GetEnumerator()
+ {
+ var i = this.cbegin();
+ var e = this.cend();
+ while (!i.Equals(e))
+ {
+ yield return i.value();
+ i.incr();
+ }
+ }
+
+ global::System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+%}
+
+%include <pbbam/internal/QueryBase.h>
+
+%template(IQuery) PacBio::BAM::internal::QueryBase<BamRecord>;
+%template(IGroupQuery) PacBio::BAM::internal::QueryBase<std::vector<BamRecord> >;
+
+// IEnumerable<BamRecord> interfaces for Queries
+%template(BamQueryIteratorBase) PacBio::BAM::internal::QueryIteratorBase<BamRecord>;
+%template(BamGroupQueryIteratorBase) PacBio::BAM::internal::QueryIteratorBase<std::vector<BamRecord> >;
+%template(BamQueryIterator) PacBio::BAM::internal::QueryIterator<BamRecord>;
+%template(BamGroupQueryIterator) PacBio::BAM::internal::QueryIterator<std::vector<BamRecord> >;
+%template(BamQueryConstIterator) PacBio::BAM::internal::QueryConstIterator<BamRecord>;
+%template(BamGroupQueryConstIterator) PacBio::BAM::internal::QueryConstIterator<std::vector<BamRecord> >;
+
+// Iterator API
+#ifdef SWIGPYTHON
+%pythoncode %{
+def Iterate(c):
+ i = c.begin()
+ e = c.end()
+ while i != e:
+ yield i.value()
+ i.incr()
+%}
+#endif
+
+%extend PacBio::BAM::internal::QueryIterator<BamRecord>
+{
+ PacBio::BAM::internal::QueryIterator<BamRecord>& incr(void)
+ { return $self->operator++(); }
+
+ PacBio::BAM::BamRecord* value(void)
+ { return $self->operator->(); }
+}
+
+%extend PacBio::BAM::internal::QueryConstIterator<BamRecord>
+{
+ PacBio::BAM::internal::QueryConstIterator<BamRecord>& incr(void)
+ { return $self->operator++(); }
+
+ const PacBio::BAM::BamRecord* value(void) const
+ { return $self->operator->(); }
+}
+
+%extend PacBio::BAM::internal::QueryIterator<std::vector<BamRecord> >
+{
+ PacBio::BAM::internal::QueryIterator<std::vector<BamRecord> >& incr(void)
+ { return $self->operator++(); }
+
+ std::vector<PacBio::BAM::BamRecord>* value(void)
+ { return $self->operator->(); }
+}
+
+%extend PacBio::BAM::internal::QueryConstIterator<std::vector<BamRecord> >
+{
+ PacBio::BAM::internal::QueryConstIterator<std::vector<BamRecord> >& incr(void)
+ { return $self->operator++(); }
+
+ const std::vector<PacBio::BAM::BamRecord>* value(void) const
+ { return $self->operator->(); }
+}
--- /dev/null
+/* ReadGroupInfo.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/ReadGroupInfo.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::ReadGroupInfo::ReadGroupInfo(ReadGroupInfo&&);
+%ignore PacBio::BAM::ReadGroupInfo::operator=;
+%ignore PacBio::BAM::ReadGroupInfo::ToSam(const ReadGroupInfo&);
+
+%include <pbbam/ReadGroupInfo.h>
\ No newline at end of file
--- /dev/null
+/* RecordType.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/RecordType.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/RecordType.h>
--- /dev/null
+/* SamTagCodec.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/SamTagCodec.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/SamTagCodec.h>
\ No newline at end of file
--- /dev/null
+/* SamWriter.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/SamWriter.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::SamWriter(const SamWriter&); // copy ctor not used
+%ignore PacBio::BAM::SamWriter(SamWriter&&); // move ctor not used
+%ignore PacBio::BAM::SamWriter::operator=; // assignment operators not used
+
+%include <pbbam/SamWriter.h>
--- /dev/null
+/* SequenceInfo.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/SequenceInfo.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::SequenceInfo::SequenceInfo(SequenceInfo&&);
+%ignore PacBio::BAM::SequenceInfo::operator=;
+%ignore PacBio::BAM::SequenceInfo::ToSam(const SequenceInfo&); // ignore static method, to allow member
+
+%include <pbbam/SequenceInfo.h>
\ No newline at end of file
--- /dev/null
+/* Strand.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/Strand.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/Strand.h>
\ No newline at end of file
--- /dev/null
+/* Tag.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/Tag.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::Tag::Tag(Tag&&);
+%ignore PacBio::BAM::Tag::operator=;
+
+#if defined(SWIGR) || defined(SWIGPYTHON)
+
+%ignore PacBio::BAM::Tag::Tag(int8_t value);
+%ignore PacBio::BAM::Tag::Tag(uint8_t value);
+%ignore PacBio::BAM::Tag::Tag(int16_t value);
+%ignore PacBio::BAM::Tag::Tag(uint16_t value);
+%ignore PacBio::BAM::Tag::Tag(int32_t value);
+%ignore PacBio::BAM::Tag::Tag(uint32_t value);
+%ignore PacBio::BAM::Tag::Tag(float value);
+%ignore PacBio::BAM::Tag::Tag(const std::string& value);
+%ignore PacBio::BAM::Tag::Tag(const std::vector<int8_t>& value);
+%ignore PacBio::BAM::Tag::Tag(const std::vector<uint8_t>& value);
+%ignore PacBio::BAM::Tag::Tag(const std::vector<int16_t>& value);
+%ignore PacBio::BAM::Tag::Tag(const std::vector<uint16_t>& value);
+%ignore PacBio::BAM::Tag::Tag(const std::vector<int32_t>& value);
+%ignore PacBio::BAM::Tag::Tag(const std::vector<uint32_t>& value);
+%ignore PacBio::BAM::Tag::Tag(const std::vector<float>& value);
+
+%extend PacBio::BAM::Tag {
+
+ PacBio::BAM::Tag FromInt8(int x) { return PacBio::BAM::Tag(static_cast<int8_t>(x)); }
+ PacBio::BAM::Tag FromUInt8(int x) { return PacBio::BAM::Tag(static_cast<uint8_t>(x)); }
+ PacBio::BAM::Tag FromInt16(int x) { return PacBio::BAM::Tag(static_cast<int16_t>(x)); }
+ PacBio::BAM::Tag FromUInt16(int x) { return PacBio::BAM::Tag(static_cast<uint16_t>(x)); }
+ PacBio::BAM::Tag FromInt32(int x) { return PacBio::BAM::Tag(static_cast<int32_t>(x)); }
+ PacBio::BAM::Tag FromUInt32(int x) { return PacBio::BAM::Tag(static_cast<uint32_t>(x)); }
+ PacBio::BAM::Tag FromFloat(int x) { return PacBio::BAM::Tag(static_cast<float>(x)); }
+
+ PacBio::BAM::Tag FromInt8Array(const std::vector<int>& v)
+ {
+ std::vector<int8_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<int8_t>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
+
+ PacBio::BAM::Tag FromUInt8Array(const std::vector<int>& v)
+ {
+ std::vector<uint8_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<uint8_t>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
+
+ PacBio::BAM::Tag FromInt16Array(const std::vector<int>& v)
+ {
+ std::vector<int16_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<int16_t>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
+
+ PacBio::BAM::Tag FromUInt16Array(const std::vector<int>& v)
+ {
+ std::vector<int16_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<uint16_t>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
+
+ PacBio::BAM::Tag FromInt32Array(const std::vector<int>& v)
+ {
+ std::vector<int16_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<int32_t>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
+
+ PacBio::BAM::Tag FromUInt32Array(const std::vector<int>& v)
+ {
+ std::vector<int16_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<uint32_t>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
+
+ PacBio::BAM::Tag FromFloatArray(const std::vector<int>& v)
+ {
+ std::vector<int16_t> result;
+ const size_t numElements = v.size();
+ result.reserve(numElements);
+ for (size_t i = 0; i < numElements; ++i)
+ result.push_back(static_cast<float>(v.at(i)));
+ return PacBio::BAM::Tag(result);
+ }
+}
+#endif // SWIGR
+
+%include <pbbam/Tag.h>
--- /dev/null
+/* TagCollection.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/TagCollection.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%template(TagCollectionType) std::map<std::string, PacBio::BAM::Tag>;
+
+%include <pbbam/TagCollection.h>
\ No newline at end of file
--- /dev/null
+/* VirtualPolymeraseBamRecord.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/VirtualRegionType.h>
+#include <pbbam/virtual/VirtualRegion.h>
+#include <pbbam/virtual/VirtualPolymeraseBamRecord.h>
+#include <pbbam/virtual/VirtualZmwBamRecord.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+typedef PacBio::BAM::VirtualZmwBamRecord VirtualPolymeraseBamRecord;
+%}
+
+///*%ignore PacBio::BAM::VirtualPolymeraseBamRecord::VirtualPolymeraseBamRecord(const VirtualPolymeraseBamRecord&);*/
+//%ignore PacBio::BAM::VirtualPolymeraseBamRecord::VirtualPolymeraseBamRecord(VirtualPolymeraseBamRecord&&);
+//%ignore PacBio::BAM::VirtualPolymeraseBamRecord::operator=;
+
+//// disabled - can't get it to work right (at least in Python)
+//// but the same info is available (& correct) from record.VirtualRegionsTable(regionType)
+//%ignore PacBio::BAM::VirtualPolymeraseBamRecord::VirtualRegionsMap;
+
+//%template(VirtualRegionList) std::vector<PacBio::BAM::VirtualRegion>;
+//%template(VirtualRegionsMap) std::map<PacBio::BAM::VirtualRegionType, std::vector<PacBio::BAM::VirtualRegion> >;
+
+%include <pbbam/virtual/VirtualPolymeraseBamRecord.h>
+%include <pbbam/virtual/VirtualZmwBamRecord.h>
+typedef PacBio::BAM::VirtualZmwBamRecord VirtualPolymeraseBamRecord;
+
+#ifdef SWIGPYTHON
+%pythoncode %{
+
+VirtualPolymeraseBamRecord = VirtualZmwBamRecord
+
+%}
+#endif
--- /dev/null
+/* VirtualPolymeraseReader.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/VirtualPolymeraseReader.h>
+#include <pbbam/virtual/ZmwReadStitcher.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+typedef PacBio::BAM::ZmwReadStitcher VirtualPolymeraseReader;
+%}
+
+%include <pbbam/virtual/VirtualPolymeraseReader.h>
+%include <pbbam/virtual/ZmwReadStitcher.h>
+typedef PacBio::BAM::ZmwReadStitcher VirtualPolymeraseReader;
+
+#ifdef SWIGPYTHON
+%pythoncode %{
+
+VirtualPolymeraseReader = ZmwReadStitcher
+
+%}
+#endif
\ No newline at end of file
--- /dev/null
+/* VirtualRegion.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/VirtualRegionType.h>
+#include <pbbam/virtual/VirtualRegion.h>
+#include <map>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%ignore PacBio::BAM::VirtualRegion::VirtualRegion(VirtualRegion&&);
+%ignore PacBio::BAM::VirtualRegion::operator=;
+
+%include <pbbam/virtual/VirtualRegionType.h>
+%include <pbbam/virtual/VirtualRegion.h>
--- /dev/null
+/* VirtualZmwBamRecord.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/VirtualRegionType.h>
+#include <pbbam/virtual/VirtualRegion.h>
+#include <pbbam/virtual/VirtualZmwBamRecord.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%feature("valuewrapper") PacBio::BAM::VirtualZmwBamRecord;
+
+/*%ignore PacBio::BAM::VirtualZmwBamRecord::VirtualZmwBamRecord(const VirtualZmwBamRecord&);*/
+%ignore PacBio::BAM::VirtualZmwBamRecord::VirtualZmwBamRecord(VirtualZmwBamRecord&&);
+%ignore PacBio::BAM::VirtualZmwBamRecord::operator=;
+
+// disabled - can't get it to work right (at least in Python)
+// but the same info is available (& correct) from record.VirtualRegionsTable(regionType)
+%ignore PacBio::BAM::VirtualZmwBamRecord::VirtualRegionsMap;
+
+%template(VirtualRegionList) std::vector<PacBio::BAM::VirtualRegion>;
+%template(VirtualRegionsMap) std::map<PacBio::BAM::VirtualRegionType, std::vector<PacBio::BAM::VirtualRegion> >;
+
+%include <pbbam/virtual/VirtualZmwBamRecord.h>
\ No newline at end of file
--- /dev/null
+/* WhitelistedZmwReadStitcher.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/WhitelistedZmwReadStitcher.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/virtual/WhitelistedZmwReadStitcher.h>
\ No newline at end of file
--- /dev/null
+
+find_package(CSharp REQUIRED)
+include (${CSHARP_USE_FILE})
+
+set(PacBioBAM_CSharpLibDir ${PacBioBAM_LibDir}/csharp/PacBio.BAM)
+set(PacBioBAM_CSharpDLL ${PacBioBAM_CSharpLibDir}/bin/Debug/PacBio.BAM.dll)
+set(CSharpTestRootDir ${PacBioBAM_TestsDir}/src/CSharp)
+set(NativeLibraryPaths ${PacBioBAM_CSharpLibDir}:${PacBioBAM_LibDir}:${Htslib_LibDir})
+
+#
+# Create SWIG wrapper
+#
+file(MAKE_DIRECTORY ${PacBioBAM_CSharpLibDir})
+set(CMAKE_SWIG_OUTDIR ${PacBioBAM_CSharpLibDir}) # ensure any swig files in lib/csharp
+set_source_files_properties(
+ PacBioBam.i PROPERTIES
+ CPLUSPLUS ON
+ SWIG_FLAGS "-namespace;PacBio.BAM")
+swig_add_module(PacBioBam csharp PacBioBam.i)
+swig_link_libraries(PacBioBam ${PacBioBAM_LIBRARIES}) # add any C# libs you need from <Find|Use>CSharp.cmake
+set_target_properties(
+ ${SWIG_MODULE_PacBioBam_REAL_NAME} # ensure wrapper lib in lib/csharp
+ PROPERTIES
+ LIBRARY_OUTPUT_DIRECTORY ${PacBioBAM_CSharpLibDir}
+)
+add_dependencies(${SWIG_MODULE_PacBioBam_REAL_NAME} pbbam)
+
+#
+# Write a csproj, then shell out to build and check the assembly---
+# can't get it working nicely in CMake yet
+#
+configure_file(
+ ${PacBioBAM_SwigSourceDir}/PacBio.BAM.csproj.in
+ ${PacBioBAM_CSharpLibDir}/PacBio.BAM.csproj)
+configure_file(
+ ${CSharpTestRootDir}/TestPbbam.cs.in
+ ${CSharpTestRootDir}/TestPbbam.cs)
+configure_file(
+ ${CSharpTestRootDir}/buildAssembly.sh.in
+ buildAssembly.sh)
+add_custom_command(
+ OUTPUT ${PacBioBAM_CSharpDLL}
+ DEPENDS ${SWIG_MODULE_PacBioBam_REAL_NAME}
+ COMMAND bash ./buildAssembly.sh "${HTSLIB_LIBRARIES}"
+)
+add_custom_target(CSharpAssembly ALL DEPENDS ${PacBioBAM_CSharpDLL})
--- /dev/null
+
+# setup
+find_package(PythonLibs REQUIRED)
+include_directories(${PYTHON_INCLUDE_PATH})
+set(PacBioBAM_PythonLibDir ${PacBioBAM_LibDir}/python)
+set(PythonTestRootDir ${PacBioBAM_TestsDir}/src/python)
+
+# create wrapper
+file(MAKE_DIRECTORY ${PacBioBAM_PythonLibDir})
+set(CMAKE_SWIG_OUTDIR ${PacBioBAM_PythonLibDir}) # put PacBioBam.py in lib/python
+
+swig_add_module(PacBioBam python PacBioBam.i)
+swig_link_libraries(PacBioBam ${PacBioBAM_LIBRARIES} ${PYTHON_LIBRARIES})
+set_target_properties(
+ ${SWIG_MODULE_PacBioBam_REAL_NAME} # put _PacBioBam.so in lib/python
+ PROPERTIES
+ LIBRARY_OUTPUT_DIRECTORY ${PacBioBAM_PythonLibDir}
+)
+#add_dependencies(${SWIG_MODULE_PacBioBam_REAL_NAME} pbbam ${PacBioBAM_LIBRARIES})
+target_link_libraries(${SWIG_MODULE_PacBioBam_REAL_NAME} pbbam)
+
+# simple "wrapper worked" check
+# this is run every build, to check importing from Python, but does NOT run full Python-side unit tests
+add_custom_target(
+ check_swig_python
+ ALL
+ "PYTHONPATH=${PacBioBAM_PythonLibDir}" python check_swig.py
+ COMMENT "Checking Python wrapper"
+ WORKING_DIRECTORY ${PythonTestRootDir}
+)
+add_dependencies(check_swig_python ${SWIG_MODULE_PacBioBam_REAL_NAME})
+
+# unit tests
+if(PacBioBAM_build_tests)
+
+ # configure data directory info
+ configure_file(
+ ${PythonTestRootDir}/test/config.py.in
+ ${PythonTestRootDir}/test/config.py
+ )
+
+ # test runner
+ add_test(
+ NAME PythonUnitTests
+ WORKING_DIRECTORY ${PythonTestRootDir}
+ COMMAND "python" test_pbbam.py
+ )
+ set_tests_properties(
+ PythonUnitTests
+ PROPERTIES
+ ENVIRONMENT "PYTHONPATH=${PacBioBAM_PythonLibDir}"
+ )
+
+endif() # unit tests
+
--- /dev/null
+# setup
+set(R_INCLUDE_DIR_HINT /mnt/software/r/R/3.1.1/usr/share/R/include) # TODO: hard-coded hint for now, clean up later
+find_package(R REQUIRED)
+include_directories(${R_INCLUDE_DIR})
+set(PacBioBAM_RLibDir ${PacBioBAM_LibDir}/R)
+set(RTestRootDir ${PacBioBAM_TestsDir}/src/R)
+
+# Suppress warnings from generated code
+include(CheckCXXCompilerFlag)
+check_cxx_compiler_flag("-Wno-unused-parameter" HAS_NO_UNUSED_PARAMETER)
+if(HAS_NO_UNUSED_PARAMETER)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
+endif()
+
+# SWIG R does not support PBBAM_SHARED_PTR, but it does support boost::shared_ptr
+# So force boost if we're wrapping for R.
+add_definitions(-DPBBAM_USE_BOOST_SHARED_PTR)
+
+# create wrapper & library
+file(MAKE_DIRECTORY ${PacBioBAM_RLibDir})
+set(CMAKE_SWIG_OUTDIR ${PacBioBAM_RLibDir}) # put PacBioBam.R wrapper in lib/R
+swig_add_module(PacBioBam r PacBioBam.i)
+swig_link_libraries(PacBioBam ${PacBioBAM_LIBRARIES})
+if(R_LIBRARIES)
+ swig_link_libraries(PacBioBam ${R_LIBRARIES})
+endif()
+
+# make sure the library is named "PacBioBam.so" explicitly
+# no "lib" prefix... that gets in the way of the name lookups between SWIG/R
+# and make sure library ends up in lib/R
+set_target_properties(
+ ${SWIG_MODULE_PacBioBam_REAL_NAME}
+ PROPERTIES
+ LIBRARY_OUTPUT_DIRECTORY ${PacBioBAM_RLibDir}
+ RUNTIME_OUTPUT_DIRECTORY ${PacBioBAM_RLibDir}
+ SONAME PacBioBam.so
+ PREFIX ""
+)
+add_dependencies(${SWIG_MODULE_PacBioBam_REAL_NAME} pbbam)
+
+# simple "wrapper worked" check
+configure_file(
+ ${RTestRootDir}/check_swig.R.in
+ ${RTestRootDir}/check_swig.R
+)
+
+add_custom_target(
+ check_swig_R
+ ALL
+ "R" --slave --no-save < ${RTestRootDir}/check_swig.R
+ COMMENT "Checking R wrapper"
+ WORKING_DIRECTORY ${PacBioBAM_RLibDir}
+)
+add_dependencies(check_swig_R ${SWIG_MODULE_PacBioBam_REAL_NAME})
+
+# unit tests
+if(PacBioBAM_build_tests)
+
+ # configure script
+ configure_file(
+ ${RTestRootDir}/test_pbbam.sh.in
+ ${RTestRootDir}/test_pbbam.sh
+ )
+
+ # test runner
+ add_test(
+ NAME RUnitTests
+ COMMAND "sh" ${RTestRootDir}/test_pbbam.sh
+ WORKING_DIRECTORY ${PacBioBAM_RLibDir}
+ )
+endif()
--- /dev/null
+/* ZmwGroupQuery.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/internal/QueryBase.h>
+#include <pbbam/ZmwGroupQuery.h>
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/internal/QueryBase.h>
+%include <pbbam/ZmwGroupQuery.h>
--- /dev/null
+/* ZmwQuery.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/ZmwQuery.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/ZmwQuery.h>
\ No newline at end of file
--- /dev/null
+/* ZmwReadStitcher.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/ZmwReadStitcher.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+%}
+
+%include <pbbam/virtual/ZmwReadStitcher.h>
\ No newline at end of file
--- /dev/null
+/* ZmwWhitelistVirtualReader.i */
+
+%module PacBioBam
+
+%{
+#include <pbbam/virtual/ZmwWhitelistVirtualReader.h>
+#include <pbbam/virtual/WhitelistedZmwReadStitcher.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+typedef PacBio::BAM::WhitelistedZmwReadStitcher ZmwWhitelistVirtualReader;
+%}
+
+%include <pbbam/virtual/ZmwWhitelistVirtualReader.h>
+%include <pbbam/virtual/WhitelistedZmwReadStitcher.h>
+typedef PacBio::BAM::WhitelistedZmwReadStitcher ZmwWhitelistVirtualReader;
+
+#ifdef SWIGPYTHON
+%pythoncode %{
+
+ZmwWhitelistVirtualReader = WhitelistedZmwReadStitcher
+
+%}
+#endif
--- /dev/null
+
+if(PacBioBAM_build_tests)
+
+ # setup GoogleTest
+ if (NOT GTEST_SRC_DIR)
+ set(PREBUILT_GTEST_SRC ${PacBioBAM_RootDir}/../../../../prebuilt.tmpout/gtest/gtest_1.7.0/)
+ if(EXISTS ${PREBUILT_GTEST_SRC})
+ set(GTEST_SRC_DIR ${PREBUILT_GTEST_SRC})
+ else()
+ set(GTEST_SRC_DIR ${PacBioBAM_RootDir}/../gtest) # keep old fallback behavior for external builds, for now at least
+ endif()
+ endif()
+ add_subdirectory(${GTEST_SRC_DIR} external/gtest/build)
+
+ # generate paths/values used by for unit tests
+ configure_file(
+ ${PacBioBAM_TestsDir}/src/TestData.h.in
+ ${CMAKE_BINARY_DIR}/generated/TestData.h
+ )
+ configure_file(
+ ${PacBioBAM_TestsDir}/data/group/group.fofn.in
+ ${CMAKE_BINARY_DIR}/generated/group.fofn
+ )
+
+ # grab PacBioBAM unit test source files
+ include(files.cmake)
+ set(SOURCES
+ ${PacBioBAMTest_H}
+ ${PacBioBAMTest_CPP}
+ )
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
+
+ # define unit test executable
+ add_definitions(-DPBBAM_TESTING)
+ if(MSVC)
+ # VS2012+ pooh-pooh's Derek's "#define private public" trick
+ add_definitions(-D_ALLOW_KEYWORD_MACROS)
+ endif()
+
+ if(PacBioBAM_wrap_r)
+ # SWIG R does not support std::shared_ptr, but it does support boost::shared_ptr
+ # So force boost if we're wrapping for R.
+ add_definitions(-DPBBAM_USE_BOOST_SHARED_PTR)
+ endif()
+
+ add_executable(test_pbbam ${SOURCES})
+ set_target_properties(test_pbbam PROPERTIES
+ RUNTIME_OUTPUT_DIRECTORY ${PacBioBAM_BinDir}
+ )
+ target_include_directories(test_pbbam
+ PUBLIC
+ ${CMAKE_BINARY_DIR}/generated
+ ${PacBioBAM_INCLUDE_DIRS}
+ ${gtest_SOURCE_DIR}/include
+ ${gtest_SOURCE_DIR}
+ )
+
+ # generate test data
+ add_custom_target(
+ generate_test_data
+ WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
+ COMMAND "python" generate_data.py
+ ${PacBioBAM_TestsDir}/data/
+ ${GeneratedTestDataDir}
+ )
+
+ # add unit tests to test framework
+ add_test(
+ NAME UnitTests
+ WORKING_DIRECTORY ${PacBioBAM_BinDir}
+ COMMAND test_pbbam
+ )
+ add_dependencies(test_pbbam generate_test_data)
+ target_link_libraries(test_pbbam
+ pbbam
+ ${CMAKE_THREAD_LIBS_INIT} # quirky pthreads
+ gtest
+ gtest_main
+ )
+endif() # PacBioBAM_build_tests
--- /dev/null
+@HD VN:1.3.1 SO:coordinate pb:3.0.3
+@SQ SN:lambda_NEB3011 LN:48502 M5:a1319ff90e994c8190a4fe6569d0822a
+@RG ID:0d7b28fa PL:PACBIO DS:READTYPE=SUBREAD;BINDINGKIT=100-619-300;SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100 PU:singleInsertion PM:SEQUEL
+@PG ID:bwa PN:bwa VN:0.7.10-r1017-dirty CL:bwa mem lambdaNEB.fa singleInsertion.fasta
+singleInsertion/100/0_49 2048 lambda_NEB3011 5211 60 3H8=1D19=1I21=59H * 0 0 GGCTGCAGGTACAGCGGTCAGGAGGCCAATTGATGCCGGACTGGCTGAT * NM:i:2 MD:Z:8^T40 AS:i:34 XS:i:0 RG:Z:0d7b28fa SA:Z:lambda_NEB3011,9378,+,52S37=2D10=1I11=,60,3; qe:i:49 qs:i:0 np:i:1 zm:i:100 rq:f:0.6 sn:B:f,0.2,0.2,0.2,0.2
+singleInsertion/200/0_49 2048 lambda_NEB3011 5211 60 3H8=1D19=1I21=59H * 0 0 GGCTGCAGGTACAGCGGTCAGGAGGCCAATTGATGCCGGACTGGCTGAT * NM:i:2 MD:Z:8^T40 AS:i:34 XS:i:0 RG:Z:0d7b28fa SA:Z:lambda_NEB3011,9378,-,37=2D10=1I11=52S,60,3; qe:i:49 qs:i:0 np:i:1 zm:i:200 rq:f:0.6 sn:B:f,0.2,0.2,0.2,0.2
+singleInsertion/100/0_111 0 lambda_NEB3011 9378 60 52S37=2D10=1I11= * 0 0 TTTGGCTGCAGGTACAGCGGTCAGGAGGCCAATTGATGCCGGACTGGCTGATAAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGAGCAGCACGGTAAACAGCGGCAA * NM:i:3 MD:Z:37^TC21 AS:i:43 XS:i:0 RG:Z:0d7b28fa SA:Z:lambda_NEB3011,5211,+,3S8=1D19=1I21=59S,60,2; qe:i:111 qs:i:0 np:i:1 zm:i:100 rq:f:0.6 sn:B:f,0.2,0.2,0.2,0.2
+singleInsertion/100/0_111 16 lambda_NEB3011 9378 60 37=2D10=1I11=52S * 0 0 AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGAGCAGCACGGTAAACAGCGGCAAATCAGCCAGTCCGGCATCAATTGGCCTCCTGACCGCTGTACCTGCAGCCAAA * NM:i:3 MD:Z:37^TC21 AS:i:43 XS:i:0 RG:Z:0d7b28fa SA:Z:lambda_NEB3011,5211,+,3S8=1D19=1I21=59S,60,2; qe:i:111 qs:i:0 np:i:1 zm:i:100 rq:f:0.6 sn:B:f,0.2,0.2,0.2,0.2
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Tags=""
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5194"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5197"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5198"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource><pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5195"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5196"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="movie" Operator="=" Value="m150404_101626_42267_c100807920800000001823174110291514_s1_p0"/>
+ <pbbase:Property Name="zm" Operator="lt" Value="1816"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:AlignmentSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource Name="First Alignments BAM" Description="Points to an example Alignments BAM file." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:///mnt/path/to/alignments0.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/alignments0.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource Name="Second Alignments BAM" Description="Points to another example Alignments BAM file, by relative path." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:./alignments1.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/alignments1.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ <pbds:DataSets>
+ <pbds:DataSet UniqueId="ab95d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" Name="HighQuality Read Alignments">
+ <pbds:Filters> <!-- These Filters are in addition to those above. This provides a means to subset and label the parent DataSet further. -->
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="rq" Value="0.85" Operator=">"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ </pbds:Filters>
+ </pbds:DataSet>
+ <pbds:DataSet UniqueId="ac95d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" Name="Alignments to chromosome 1">
+ <pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="RNAME" Value="chr1" Operator="=="/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ </pbds:Filters>
+ </pbds:DataSet>
+ </pbds:DataSets>
+</pbds:AlignmentSet>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:AlignmentSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource Name="First Alignments BAM" Description="Points to an example Alignments BAM file." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:///mnt/path/to/alignments2.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/alignments2.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource Name="Second Alignments BAM" Description="Points to another example Alignments BAM file, by relative path." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:./alignments3.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/alignments3.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ <pbds:DataSets>
+ <pbds:DataSet UniqueId="ab95d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" Name="HighQuality Read Alignments">
+ <pbds:Filters> <!-- These Filters are in addition to those above. This provides a means to subset and label the parent DataSet further. -->
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="rq" Value="0.85" Operator=">"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ </pbds:Filters>
+ </pbds:DataSet>
+ <pbds:DataSet UniqueId="ac95d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" Name="Alignments to chromosome 1">
+ <pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="RNAME" Value="chr1" Operator="=="/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ </pbds:Filters>
+ </pbds:DataSet>
+ </pbds:DataSets>
+</pbds:AlignmentSet>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:AlignmentSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource Name="First Alignments BAM" Description="Points to an example Alignments BAM file." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:///mnt/path/to/alignments2.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/alignments2.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource Name="Second Alignments BAM" Description="Points to another example Alignments BAM file, by relative path." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:./alignments3.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/alignments3.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ <pbds:DataSets>
+ <pbds:DataSet UniqueId="ab95d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" Name="HighQuality Read Alignments">
+ <pbds:Filters> <!-- These Filters are in addition to those above. This provides a means to subset and label the parent DataSet further. -->
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="rq" Value="0.75" Operator=">"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ </pbds:Filters>
+ </pbds:DataSet>
+ <pbds:DataSet UniqueId="ac95d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" Name="Alignments to chromosome 1">
+ <pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="RNAME" Value="chr1" Operator="=="/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ </pbds:Filters>
+ </pbds:DataSet>
+ </pbds:DataSets>
+</pbds:AlignmentSet>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:AlignmentSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource Name="First Alignments BAM" Description="Points to an example Alignments BAM file." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:///mnt/path/to/alignments0.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/alignments0.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource Name="Second Alignments BAM" Description="Points to another example Alignments BAM file, by relative path." MetaType="AlignmentFile.AlignmentBamFile" ResourceId="file:./alignments1.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/alignments1.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ <pbds:DataSets>
+ <pbds:DataSet UniqueId="ab95d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" Name="HighQuality Read Alignments">
+ <pbds:Filters> <!-- These Filters are in addition to those above. This provides a means to subset and label the parent DataSet further. -->
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="rq" Value="0.85" Operator=">"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ </pbds:Filters>
+ </pbds:DataSet>
+ <pbds:DataSet UniqueId="ac95d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" Name="Alignments to chromosome 1">
+ <pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="RNAME" Value="chr1" Operator="=="/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ </pbds:Filters>
+ </pbds:DataSet>
+ </pbds:DataSets>
+</pbds:AlignmentSet>
--- /dev/null
+<?xml version='1.0' encoding='UTF-8'?>
+<pbds:DataSet CreatedAt="2015-05-13T10:58:26" MetaType="PacBio.DataSet.DataSet" Name="" Tags="" UniqueId="30f72098-bc5b-e06b-566c-8b28dda909a8" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource ResourceId="file:tests/data/bam_mapping_1.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:tests/data/bam_mapping_1.bam.bai"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource ResourceId="file:tests/data/bam_mapping_2.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:tests/data/bam_mapping_2.bam.bai"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ <pbds:DataSets>
+ <pbds:DataSet CreatedAt="2015-05-13T10:58:26" UniqueId="c5402d06-4643-057c-e300-fe229b4e8909" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource ResourceId="file:tests/data/bam_mapping_2.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:tests/data/bam_mapping_2.bam.bai"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ </pbds:DataSet>
+ <pbds:DataSet CreatedAt="2015-05-13T10:58:26" UniqueId="f8b54a55-5fb7-706f-ab35-39afc9c86924" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource ResourceId="file:tests/data/bam_mapping_1.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:tests/data/bam_mapping_1.bam.bai"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ </pbds:DataSet>
+ </pbds:DataSets>
+</pbds:DataSet>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:BarcodeSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.BarcodeSet" Name="DataSet_BarcodeSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource Name="First Barcodes FASTA" Description="Points to an example Barcodes FASTA file." MetaType="BarcodeFile.BarcodeFastaFile" ResourceId="file:///mnt/path/to/barcode.fasta" Tags="Example"/>
+ </pbbase:ExternalResources>
+ <pbds:DataSetMetadata>
+ <pbds:TotalLength>400</pbds:TotalLength>
+ <pbds:NumRecords>30</pbds:NumRecords>
+ <pbds:BarcodeConstruction>paired</pbds:BarcodeConstruction>
+ </pbds:DataSetMetadata>
+</pbds:BarcodeSet>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:ConsensusReadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.ConsensusReadSet" Name="DataSet_ConsensusReadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource Name="First ConsensusRead BAM" Description="Points to an example ConsensusRead BAM file." MetaType="PacBio.ConsensusReadFile.ConsensusReadBamFile" ResourceId="file:///mnt/path/to/ccsreads0.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex MetaType="PacBio.Index.PacBioIndex" ResourceId="file:///mnt/path/to/ccsreads0.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource Name="Second ConsensusRead BAM" Description="Points to another example ConsensusRead BAM file." MetaType="PacBio.ConsensusReadFile.ConsensusReadBamFile" ResourceId="file:///mnt/path/to/ccsreads1.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex MetaType="PacBio.Index.PacBioIndex" ResourceId="file:///mnt/path/to/ccsreads0.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+</pbds:ConsensusReadSet>
--- /dev/null
+<?xml version='1.0' encoding='UTF-8'?>
+<pbds:ReferenceSet CreatedAt="2015-05-28T10:56:36" MetaType="PacBio.DataSet.ReferenceSet" Name="" Tags="" UniqueId="596e87db-34f9-d2fd-c905-b017543170e1" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource ResourceId="file:tests/data/lambda_contigs.fasta"/>
+ </pbbase:ExternalResources>
+</pbds:ReferenceSet>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<SubreadSet
+ CreatedAt="2015-08-19T15:39:36.331"
+ Description="Merged dataset from 1 files using DatasetMerger 0.1.2"
+ MetaType="PacBio.DataSet.HdfSubreadSet"
+ Name="Subreads from runr000013_42267_150403"
+ Tags="pacbio.secondary.instrument=RS"
+ TimeStampedName="hdfsubreadset_2015-08-19T15:39:36.331-07:00"
+ UniqueId="b4741521-2a4c-42df-8a13-0a755ca9ed1e"
+ Version="0.5"
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:ns0="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:ns1="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:ns2="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:ns3="http://pacificbiosciences.com/PacBioReagentKit.xsd">
+ <ns0:ExternalResources>
+ <ns0:ExternalResource
+ MetaType="SubreadFile.SubreadBamFile"
+ TimeStampedName="SubreadFile.SubreadBamFile_00000000000000"
+ UniqueId="251acf71-9eb0-489e-9dd1-cdbd11432753"
+ ResourceId="file:///mnt/secondary-siv/jenkins/jenkins-bot01/workspace/Ubuntu1404_Mainline_SA3_Tiny_tests/software/smrtanalysis/siv/testkit-jobs/sa3_pipelines/mapping/tiny/job_output-ubuntu1404/tasks/pbsmrtpipe.tasks.h5_subreads_to_subread-0//mnt/secondary-siv/jenkins/jenkins-bot01/workspace/Ubuntu1404_Mainline_SA3_Tiny_tests/software/smrtanalysis/siv/testkit-jobs/sa3_pipelines/mapping/tiny/job_output-ubuntu1404/tasks/pbsmrtpipe.tasks.h5_subreads_to_subread-0/file.subreads.subreads.bam" />
+ </ns0:ExternalResources>
+ <DataSetMetadata>
+ <TotalLength>50000000</TotalLength>
+ <NumRecords>150000</NumRecords>
+ <ns2:Collections>
+ <ns2:CollectionMetadata
+ Context="m150404_101626_42267_c100807920800000001823174110291514_s1_p0"
+ InstrumentId="1"
+ InstrumentName="42267"
+ MetaType="PacBio.Collection"
+ TimeStampedName="m150404_101626_42267_c100807920800000001823174110291514_s1_p0"
+ UniqueId="d66c8372-2b70-4dcf-b64f-9f8b5cc351fd">
+ <ns2:InstCtrlVer>2.3.0.1.142990</ns2:InstCtrlVer>
+ <ns2:SigProcVer>NRT@172.31.128.10:8082, SwVer=2301.142990, HwVer=1.0</ns2:SigProcVer>
+ <ns2:RunDetails>
+ <ns2:RunId>r000013_42267_150403</ns2:RunId>
+ <ns2:Name>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:Name>
+ </ns2:RunDetails>
+ <ns2:WellSample Name="Inst42267-040315-SAT-100pM-2kb-P6C4">
+ <ns2:PlateId>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:PlateId>
+ <ns2:WellName>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:WellName>
+ <ns2:Concentration>0.0</ns2:Concentration>
+ <ns2:SampleReuseEnabled>false</ns2:SampleReuseEnabled>
+ <ns2:StageHotstartEnabled>false</ns2:StageHotstartEnabled>
+ <ns2:SizeSelectionEnabled>false</ns2:SizeSelectionEnabled>
+ <ns2:UseCount>1</ns2:UseCount>
+ <ns1:BioSamplePointers>
+ <ns1:BioSamplePointer>251acf71-9eb0-489e-9dd1-cdbd11432752</ns1:BioSamplePointer>
+ </ns1:BioSamplePointers>
+ </ns2:WellSample>
+ <ns2:Automation>
+ <ns0:AutomationParameters>
+ <ns0:AutomationParameter />
+ </ns0:AutomationParameters>
+ </ns2:Automation>
+ <ns2:CollectionNumber>7</ns2:CollectionNumber>
+ <ns2:CellIndex>4</ns2:CellIndex>
+ <ns2:CellPac Barcode="10080792080000000182317411029151" />
+ <ns2:Primary>
+ <ns2:AutomationName>BasecallerV1</ns2:AutomationName>
+ <ns2:ConfigFileName>2-3-0_P6-C4.xml</ns2:ConfigFileName>
+ <ns2:SequencingCondition />
+ <ns2:OutputOptions>
+ <ns2:ResultsFolder>Analysis_Results</ns2:ResultsFolder>
+ <ns2:CollectionPathUri>rsy://mp-rsync/vol55//RS_DATA_STAGING/42267/Inst42267-040315-SAT-100pM-2kb-P6C4_13/A04_7/</ns2:CollectionPathUri>
+ <ns2:CopyFiles>
+ <ns2:CollectionFileCopy>Fasta</ns2:CollectionFileCopy>
+ </ns2:CopyFiles>
+ <ns2:Readout>Bases</ns2:Readout>
+ <ns2:MetricsVerbosity>Minimal</ns2:MetricsVerbosity>
+ </ns2:OutputOptions>
+ </ns2:Primary>
+ </ns2:CollectionMetadata>
+ </ns2:Collections>
+ <ns1:BioSamples>
+ <ns1:BioSample
+ Description="Inst42267-SAT-100pM-2kbLambda-P6C4-Std120_CPS_040315"
+ MetaType="PacBio.Sample"
+ Name="Inst42267-040315-SAT-100pM-2kb-P6C4"
+ TimeStampedName="biosample_2015-08-19T15:39:36.331-07:00"
+ UniqueId="251acf71-9eb0-489e-9dd1-cdbd11432752" />
+ </ns1:BioSamples>
+ </DataSetMetadata>
+</SubreadSet>
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<pbds:DataSet CreatedAt="2015-05-22T16:56:16" MetaType="PacBio.DataSet.DataSet" Name="" Tags="" UniqueId="58e3f7c5-24c1-b58b-fbd5-37de268cc2f0" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource ResourceId="file:tests/data/pbalchemy10kbp.pbalign.sorted.pbver1.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:tests/data/pbalchemy10kbp.pbalign.sorted.pbver1.bam.bai"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ <pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="rname" Value="E.faecalis.1" Operator="=" />
+ </pbbase:Properties>
+ </pbds:Filter>
+ </pbds:Filters>
+</pbds:DataSet>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:ReferenceSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.ReferenceSet" Name="DataSet_ReferenceSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource Name="First References FASTA" Description="Points to an example references FASTA file." MetaType="PacBio.ReferenceFile.ReferenceFastaFile" ResourceId="file:///mnt/path/to/reference.fasta" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex MetaType="PacBio.Index.SaWriterIndex" ResourceId="file:///mnt/path/to/reference.fasta.sa"/>
+ <pbbase:FileIndex MetaType="PacBio.Index.SamIndex" ResourceId="file:///mnt/path/to/reference.fasta.fai"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ <pbds:DataSetMetadata>
+ <pbds:TotalLength>5000000</pbds:TotalLength>
+ <pbds:NumRecords>500</pbds:NumRecords>
+ <pbds:Organism>Tribble</pbds:Organism>
+ <pbds:Ploidy>Diploid</pbds:Ploidy>
+ <pbds:Contigs>
+ <pbds:Contig Name="gi|229359445|emb|AM181176.4|" Description="Pseudomonas fluorescens SBW25 complete genome|quiver" Length="6722109" Digest="f627c795efad7ce0050ed42b942d408e"/>
+ </pbds:Contigs>
+ </pbds:DataSetMetadata>
+</pbds:ReferenceSet>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd" >
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource Name="First Subreads BAM" Description="Points to an example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads0.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/subreads0.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource Name="Second Subreads BAM" Description="Points to another example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads1.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/subreads0.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ <pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="rq" Value="0.75" Operator=">"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="QNAME" Value="100/0/0_100" Operator="=="/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ </pbds:Filters>
+ <pbds:DataSetMetadata>
+ <pbds:TotalLength>500000</pbds:TotalLength>
+ <pbds:NumRecords>500</pbds:NumRecords>
+ <pbmeta:Collections>
+ <pbmeta:CollectionMetadata Context="m152720_092723_00114_c100480560100000001823075906281381_s1_p0" InstrumentName="RS" InstrumentId="43210">
+ <pbmeta:InstCtrlVer>2.3.0.0.140640</pbmeta:InstCtrlVer>
+ <pbmeta:SigProcVer>NRT@172.31.128.10:8082, SwVer=2300.140640, HwVer=1.0</pbmeta:SigProcVer>
+ <pbmeta:RunDetails>
+ <pbmeta:RunId>e903682f-e502-465c-a2b6-9dd77c9f43fc</pbmeta:RunId>
+ <pbmeta:Name>beta4_130726_biotin_DEV_vs_MFG_PB11K_9458p</pbmeta:Name>
+ </pbmeta:RunDetails>
+ <pbmeta:WellSample Name="Well Sample 1" UniqueId="aaa2df90-d44f-4a48-9f35-3b99473c68f5">
+ <pbmeta:PlateId>2014-12-24_141_NGAT_Igor_bisPNA Enrichment_Mag Bead Elution Buffers</pbmeta:PlateId>
+ <pbmeta:WellName>B01</pbmeta:WellName>
+ <pbmeta:Concentration>10</pbmeta:Concentration>
+ <pbmeta:SampleReuseEnabled>true</pbmeta:SampleReuseEnabled>
+ <pbmeta:StageHotstartEnabled>true</pbmeta:StageHotstartEnabled>
+ <pbmeta:SizeSelectionEnabled>true</pbmeta:SizeSelectionEnabled>
+ <pbmeta:UseCount>0</pbmeta:UseCount>
+ <pbmeta:Comments>Lorem ipsum</pbmeta:Comments>
+ <pbsample:BioSamplePointers>
+ <pbsample:BioSamplePointer>abc2df90-d44f-4a48-9f35-3b99473c68f5</pbsample:BioSamplePointer>
+ </pbsample:BioSamplePointers>
+ </pbmeta:WellSample>
+ <pbmeta:Automation>
+ <pbbase:AutomationParameters>
+ <pbbase:AutomationParameter/>
+ </pbbase:AutomationParameters>
+ </pbmeta:Automation>
+ <pbmeta:CollectionNumber>0</pbmeta:CollectionNumber>
+ <pbmeta:CellIndex>0</pbmeta:CellIndex>
+ <pbmeta:CellPac Barcode="100480560100000001823075906281381"/>
+ <pbmeta:Primary>
+ <pbmeta:AutomationName>BasecallerV1</pbmeta:AutomationName>
+ <pbmeta:ConfigFileName>1-3-0_Standard_C2.xml</pbmeta:ConfigFileName>
+ <pbmeta:SequencingCondition/>
+ <pbmeta:ResultsFolder>Analysis_Results</pbmeta:ResultsFolder>
+ <pbmeta:CollectionPathUri>rsy://mp-rsync/vol56//RS_DATA_STAGING//2014-12-24_141_NGAT_Igor_bisPNA%20Enrichment_Mag%20Bead%20Elution%20Buffers_1094/B01_1</pbmeta:CollectionPathUri>
+ <pbmeta:CopyFiles>
+ <pbmeta:CollectionFileCopy>Bam</pbmeta:CollectionFileCopy>
+ </pbmeta:CopyFiles>
+ </pbmeta:Primary>
+ </pbmeta:CollectionMetadata>
+ </pbmeta:Collections>
+ <pbsample:BioSamples>
+ <pbsample:BioSample UniqueId="abc2df90-d44f-4a48-9f35-3b99473c68f5" Name="consectetur purus" Description="Risus sit amet lectus vehicula vulputate quisque porta accumsan venenatis." CreatedAt="2015-01-20T13:27:23.9271737-08:00"/>
+ </pbsample:BioSamples>
+ </pbds:DataSetMetadata>
+</pbds:SubreadSet>
+<!-- TODO what do internal references look like?-->
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd" >
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource Name="First Subreads BAM" Description="Points to an example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads2.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/subreads2.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource Name="Second Subreads BAM" Description="Points to another example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads3.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/subreads3.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ <pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="rq" Value="0.75" Operator=">"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="QNAME" Value="100/0/0_100" Operator="=="/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ </pbds:Filters>
+ <pbds:DataSetMetadata>
+ <pbds:TotalLength>500000</pbds:TotalLength>
+ <pbds:NumRecords>500</pbds:NumRecords>
+ <pbmeta:Collections>
+ <pbmeta:CollectionMetadata Context="m152720_092723_00114_c100480560100000001823075906281381_s1_p0" InstrumentName="RS" InstrumentId="43210">
+ <pbmeta:InstCtrlVer>2.3.0.0.140640</pbmeta:InstCtrlVer>
+ <pbmeta:SigProcVer>NRT@172.31.128.10:8082, SwVer=2300.140640, HwVer=1.0</pbmeta:SigProcVer>
+ <pbmeta:RunDetails>
+ <pbmeta:RunId>e903682f-e502-465c-a2b6-9dd77c9f43fc</pbmeta:RunId>
+ <pbmeta:Name>beta4_130726_biotin_DEV_vs_MFG_PB11K_9458p</pbmeta:Name>
+ </pbmeta:RunDetails>
+ <pbmeta:WellSample Name="Well Sample 1" UniqueId="aaa2df90-d44f-4a48-9f35-3b99473c68f5">
+ <pbmeta:PlateId>2014-12-24_141_NGAT_Igor_bisPNA Enrichment_Mag Bead Elution Buffers</pbmeta:PlateId>
+ <pbmeta:WellName>B01</pbmeta:WellName>
+ <pbmeta:Concentration>10</pbmeta:Concentration>
+ <pbmeta:SampleReuseEnabled>true</pbmeta:SampleReuseEnabled>
+ <pbmeta:StageHotstartEnabled>true</pbmeta:StageHotstartEnabled>
+ <pbmeta:SizeSelectionEnabled>true</pbmeta:SizeSelectionEnabled>
+ <pbmeta:UseCount>0</pbmeta:UseCount>
+ <pbmeta:Comments>Lorem ipsum</pbmeta:Comments>
+ <pbsample:BioSamplePointers>
+ <pbsample:BioSamplePointer>abc2df90-d44f-4a48-9f35-3b99473c68f5</pbsample:BioSamplePointer>
+ </pbsample:BioSamplePointers>
+ </pbmeta:WellSample>
+ <pbmeta:Automation>
+ <pbbase:AutomationParameters>
+ <pbbase:AutomationParameter/>
+ </pbbase:AutomationParameters>
+ </pbmeta:Automation>
+ <pbmeta:CollectionNumber>0</pbmeta:CollectionNumber>
+ <pbmeta:CellIndex>0</pbmeta:CellIndex>
+ <pbmeta:CellPac Barcode="100480560100000001823075906281381"/>
+ <pbmeta:Primary>
+ <pbmeta:AutomationName>BasecallerV1</pbmeta:AutomationName>
+ <pbmeta:ConfigFileName>1-3-0_Standard_C2.xml</pbmeta:ConfigFileName>
+ <pbmeta:SequencingCondition/>
+ <pbmeta:ResultsFolder>Analysis_Results</pbmeta:ResultsFolder>
+ <pbmeta:CollectionPathUri>rsy://mp-rsync/vol56//RS_DATA_STAGING//2014-12-24_141_NGAT_Igor_bisPNA%20Enrichment_Mag%20Bead%20Elution%20Buffers_1094/B01_1</pbmeta:CollectionPathUri>
+ <pbmeta:CopyFiles>
+ <pbmeta:CollectionFileCopy>Bam</pbmeta:CollectionFileCopy>
+ </pbmeta:CopyFiles>
+ </pbmeta:Primary>
+ </pbmeta:CollectionMetadata>
+ </pbmeta:Collections>
+ <pbsample:BioSamples>
+ <pbsample:BioSample UniqueId="abc2df90-d44f-4a48-9f35-3b99473c68f5" Name="consectetur purus" Description="Risus sit amet lectus vehicula vulputate quisque porta accumsan venenatis." CreatedAt="2015-01-20T13:27:23.9271737-08:00"/>
+ </pbsample:BioSamples>
+ </pbds:DataSetMetadata>
+</pbds:SubreadSet>
+<!-- TODO what do internal references look like?-->
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet CreatedAt="2015-01-27T09:00:01" MetaType="PacBio.DataSet.SubreadSet" Name="DataSet_SubreadSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd" xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd" xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd" xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd" >
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource Name="First Subreads BAM" Description="Points to an example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads2.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/subreads2.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource Name="Second Subreads BAM" Description="Points to another example Subreads BAM file." MetaType="SubreadFile.SubreadBamFile" ResourceId="file:///mnt/path/to/subreads3.bam" Tags="Example">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex ResourceId="file:///mnt/path/to/subreads3.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ <pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="rq" Value="0.85" Operator=">"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="QNAME" Value="100/0/0_100" Operator="=="/>
+ </pbbase:Properties>
+ </pbds:Filter>
+ </pbds:Filters>
+ <pbds:DataSetMetadata>
+ <pbds:TotalLength>500000</pbds:TotalLength>
+ <pbds:NumRecords>500</pbds:NumRecords>
+ <pbmeta:Collections>
+ <pbmeta:CollectionMetadata Context="m152720_092723_00114_c100480560100000001823075906281381_s1_p0" InstrumentName="RS" InstrumentId="43210">
+ <pbmeta:InstCtrlVer>2.3.0.0.140640</pbmeta:InstCtrlVer>
+ <pbmeta:SigProcVer>NRT@172.31.128.10:8082, SwVer=2300.140640, HwVer=1.0</pbmeta:SigProcVer>
+ <pbmeta:RunDetails>
+ <pbmeta:RunId>e903682f-e502-465c-a2b6-9dd77c9f43fc</pbmeta:RunId>
+ <pbmeta:Name>beta4_130726_biotin_DEV_vs_MFG_PB11K_9458p</pbmeta:Name>
+ </pbmeta:RunDetails>
+ <pbmeta:WellSample Name="Well Sample 1" UniqueId="aaa2df90-d44f-4a48-9f35-3b99473c68f5">
+ <pbmeta:PlateId>2014-12-24_141_NGAT_Igor_bisPNA Enrichment_Mag Bead Elution Buffers</pbmeta:PlateId>
+ <pbmeta:WellName>B01</pbmeta:WellName>
+ <pbmeta:Concentration>10</pbmeta:Concentration>
+ <pbmeta:SampleReuseEnabled>true</pbmeta:SampleReuseEnabled>
+ <pbmeta:StageHotstartEnabled>true</pbmeta:StageHotstartEnabled>
+ <pbmeta:SizeSelectionEnabled>true</pbmeta:SizeSelectionEnabled>
+ <pbmeta:UseCount>0</pbmeta:UseCount>
+ <pbmeta:Comments>Lorem ipsum</pbmeta:Comments>
+ <pbsample:BioSamplePointers>
+ <pbsample:BioSamplePointer>abc2df90-d44f-4a48-9f35-3b99473c68f5</pbsample:BioSamplePointer>
+ </pbsample:BioSamplePointers>
+ </pbmeta:WellSample>
+ <pbmeta:Automation>
+ <pbbase:AutomationParameters>
+ <pbbase:AutomationParameter/>
+ </pbbase:AutomationParameters>
+ </pbmeta:Automation>
+ <pbmeta:CollectionNumber>0</pbmeta:CollectionNumber>
+ <pbmeta:CellIndex>0</pbmeta:CellIndex>
+ <pbmeta:CellPac Barcode="100480560100000001823075906281381"/>
+ <pbmeta:Primary>
+ <pbmeta:AutomationName>BasecallerV1</pbmeta:AutomationName>
+ <pbmeta:ConfigFileName>1-3-0_Standard_C2.xml</pbmeta:ConfigFileName>
+ <pbmeta:SequencingCondition/>
+ <pbmeta:ResultsFolder>Analysis_Results</pbmeta:ResultsFolder>
+ <pbmeta:CollectionPathUri>rsy://mp-rsync/vol56//RS_DATA_STAGING//2014-12-24_141_NGAT_Igor_bisPNA%20Enrichment_Mag%20Bead%20Elution%20Buffers_1094/B01_1</pbmeta:CollectionPathUri>
+ <pbmeta:CopyFiles>
+ <pbmeta:CollectionFileCopy>Bam</pbmeta:CollectionFileCopy>
+ </pbmeta:CopyFiles>
+ </pbmeta:Primary>
+ </pbmeta:CollectionMetadata>
+ </pbmeta:Collections>
+ <pbsample:BioSamples>
+ <pbsample:BioSample UniqueId="abc2df90-d44f-4a48-9f35-3b99473c68f5" Name="consectetur purus" Description="Risus sit amet lectus vehicula vulputate quisque porta accumsan venenatis." CreatedAt="2015-01-20T13:27:23.9271737-08:00"/>
+ </pbsample:BioSamples>
+ </pbds:DataSetMetadata>
+</pbds:SubreadSet>
+<!-- TODO what do internal references look like?-->
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<pbds:HdfSubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:bax="http://whatever"
+ xmlns:fn="http://www.w3.org/2005/xpath-functions"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:uuid="java:java.util.UUID"
+ xmlns:xs="http://www.w3.org/2001/XMLSchema"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ Name="Subreads from run r001173_42129_130607"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Tags="pacbio.secondary.instrument=RS"
+ Version="0.5"
+ UniqueId="abbc9183-b01e-4671-8c12-19efee534647">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource MetaType="PacBio.SubreadFile.BaxFile"
+ ResourceId="file:///mnt/secondary-siv/testdata/LIMS/2590727/0001/Analysis_Results/m130608_033634_42129_c100515232550000001823076608221351_s1_p0.0.bax.h5"/>
+ <pbbase:ExternalResource MetaType="PacBio.SubreadFile.BaxFile"
+ ResourceId="file:///mnt/secondary-siv/testdata/LIMS/2590727/0001/Analysis_Results/m130608_033634_42129_c100515232550000001823076608221351_s1_p0.1.bax.h5"/>
+ <pbbase:ExternalResource MetaType="PacBio.SubreadFile.BaxFile"
+ ResourceId="file:///mnt/secondary-siv/testdata/LIMS/2590727/0001/Analysis_Results/m130608_033634_42129_c100515232550000001823076608221351_s1_p0.2.bax.h5"/>
+ </pbbase:ExternalResources>
+ <pbds:DataSetMetadata>
+ <pbds:TotalLength>50000000</pbds:TotalLength>
+ <pbds:NumRecords>150000</pbds:NumRecords>
+ <pbmeta:Collections>
+ <pbmeta:CollectionMetadata Context="m130608_033634_42129_c100515232550000001823076608221351_s1_p0"
+ InstrumentName="42129"
+ InstrumentId="1">
+ <pbmeta:InstCtrlVer>2.0.1.0.124174</pbmeta:InstCtrlVer>
+ <pbmeta:SigProcVer>NRT@172.31.128.10:8082, SwVer=2010.124174, HwVer=1.0</pbmeta:SigProcVer>
+ <pbmeta:RunDetails>
+ <pbmeta:RunId>r001173_42129_130607</pbmeta:RunId>
+ <pbmeta:Name>2013-06-07_42129_10kb_Ecoli_201-validation_2</pbmeta:Name>
+ </pbmeta:RunDetails>
+ <pbmeta:WellSample Name="P4-C2_Ecoli_10kb_MBS_stageHS">
+ <pbmeta:PlateId>2013-06-07_42129_10kb_Ecoli_201-validation_2</pbmeta:PlateId>
+ <pbmeta:WellName>P4-C2_Ecoli_10kb_MBS_stageHS</pbmeta:WellName>
+ <pbmeta:Concentration>0</pbmeta:Concentration>
+ <pbmeta:SampleReuseEnabled>false</pbmeta:SampleReuseEnabled>
+ <pbmeta:StageHotstartEnabled>true</pbmeta:StageHotstartEnabled>
+ <pbmeta:SizeSelectionEnabled>
+ false
+ </pbmeta:SizeSelectionEnabled>
+ <pbmeta:UseCount>1</pbmeta:UseCount>
+ <pbmeta:Comments>P4-C2_Ecoli_10kb_MBS_stageHS</pbmeta:Comments>
+ <pbsample:BioSamplePointers>
+ <pbsample:BioSamplePointer>abafd4ed-5cf7-4b83-a869-1a5d239d30e2</pbsample:BioSamplePointer>
+ </pbsample:BioSamplePointers>
+ </pbmeta:WellSample>
+ <pbmeta:AutomationName>MagBead Standard Seq v2</pbmeta:AutomationName>
+ <pbmeta:CollectionNumber>2</pbmeta:CollectionNumber>
+ <pbmeta:CellIndex>1</pbmeta:CellIndex>
+ <pbmeta:CellPac Barcode="10051523255000000182307660822135"/>
+ <pbmeta:Primary>
+ <pbmeta:AutomationName>BasecallerV1</pbmeta:AutomationName>
+ <pbmeta:ConfigFileName>2-0-0_P4-C2.xml</pbmeta:ConfigFileName>
+ <pbmeta:SequencingCondition/>
+ <pbmeta:ResultsFolder>Analysis_Results</pbmeta:ResultsFolder>
+ <pbmeta:CollectionPathUri>rsy://mp-f030-io/vol54//RS_DATA_STAGING/42129/2013-06-07_42129_10kb_Ecoli_201-validation_2_1173/A01_2/</pbmeta:CollectionPathUri>
+ <pbmeta:CopyFiles>
+ <pbmeta:CollectionFileCopy>Fasta</pbmeta:CollectionFileCopy>
+ </pbmeta:CopyFiles>
+ </pbmeta:Primary>
+ </pbmeta:CollectionMetadata>
+ </pbmeta:Collections>
+ <pbsample:BioSamples>
+ <pbsample:BioSample Name="P4-C2_Ecoli_10kb_MBS_stageHS" Description="P4-C2_Ecoli_10kb_MBS_stageHS"
+ UniqueId="abafd4ed-5cf7-4b83-a869-1a5d239d30e2"/>
+ </pbsample:BioSamples>
+ </pbds:DataSetMetadata>
+</pbds:HdfSubreadSet>
--- /dev/null
+@PacBioBAM_TestsDir@/data/group/test1.bam
+@PacBioBAM_TestsDir@/data/group/test2.bam
+@PacBioBAM_TestsDir@/data/group/test3.bam
--- /dev/null
+>lambda_NEB3011
+GGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGCGTTTCCGTTCTTCTTCGTCATAACTTA
+ATGTTTTTATTTAAAATACCCTCTGAAAAGAAAGGAAACGACAGGTGCTGAAAGCGAGCTTTTTGGCCTCTGTCGTTTCC
+TTTCTCTGTTTTTGTCCGTGGAATGAACAATGGAAGTCAACAAAAAGCAGCTGGCTGACATTTTCGGTGCGAGTATCCGT
+ACCATTCAGAACTGGCAGGAACAGGGAATGCCCGTTCTGCGAGGCGGTGGCAAGGGTAATGAGGTGCTTTATGACTCTGC
+CGCCGTCATAAAATGGTATGCCGAAAGGGATGCTGAAATTGAGAACGAAAAGCTGCGCCGGGAGGTTGAAGAACTGCGGC
+AGGCCAGCGAGGCAGATCTCCAGCCAGGAACTATTGAGTACGAACGCCATCGACTTACGCGTGCGCAGGCCGACGCACAG
+GAACTGAAGAATGCCAGAGACTCCGCTGAAGTGGTGGAAACCGCATTCTGTACTTTCGTGCTGTCGCGGATCGCAGGTGA
+AATTGCCAGTATTCTCGACGGGCTCCCCCTGTCGGTGCAGCGGCGTTTTCCGGAACTGGAAAACCGACATGTTGATTTCC
+TGAAACGGGATATCATCAAAGCCATGAACAAAGCAGCCGCGCTGGATGAACTGATACCGGGGTTGCTGAGTGAATATATC
+GAACAGTCAGGTTAACAGGCTGCGGCATTTTGTCCGCGCCGGGCTTCGCTCACTGTTCAGGCCGGAGCCACAGACCGCCG
+TTGAATGGGCGGATGCTAATTACTATCTCCCGAAAGAATCCGCATACCAGGAAGGGCGCTGGGAAACACTGCCCTTTCAG
+CGGGCCATCATGAATGCGATGGGCAGCGACTACATCCGTGAGGTGAATGTGGTGAAGTCTGCCCGTGTCGGTTATTCCAA
+AATGCTGCTGGGTGTTTATGCCTACTTTATAGAGCATAAGCAGCGCAACACCCTTATCTGGTTGCCGACGGATGGTGATG
+CCGAGAACTTTATGAAAACCCACGTTGAGCCGACTATTCGTGATATTCCGTCGCTGCTGGCGCTGGCCCCGTGGTATGGC
+AAAAAGCACCGGGATAACACGCTCACCATGAAGCGTTTCACTAATGGGCGTGGCTTCTGGTGCCTGGGCGGTAAAGCGGC
+AAAAAACTACCGTGAAAAGTCGGTGGATGTGGCGGGTTATGATGAACTTGCTGCTTTTGATGATGATATTGAACAGGAAG
+GCTCTCCGACGTTCCTGGGTGACAAGCGTATTGAAGGCTCGGTCTGGCCAAAGTCCATCCGTGGCTCCACGCCAAAAGTG
+AGAGGCACCTGTCAGATTGAGCGTGCAGCCAGTGAATCCCCGCATTTTATGCGTTTTCATGTTGCCTGCCCGCATTGCGG
+GGAGGAGCAGTATCTTAAATTTGGCGACAAAGAGACGCCGTTTGGCCTCAAATGGACGCCGGATGACCCCTCCAGCGTGT
+TTTATCTCTGCGAGCATAATGCCTGCGTCATCCGCCAGCAGGAGCTGGACTTTACTGATGCCCGTTATATCTGCGAAAAG
+ACCGGGATCTGGACCCGTGATGGCATTCTCTGGTTTTCGTCATCCGGTGAAGAGATTGAGCCACCTGACAGTGTGACCTT
+TCACATCTGGACAGCGTACAGCCCGTTCACCACCTGGGTGCAGATTGTCAAAGACTGGATGAAAACGAAAGGGGATACGG
+GAAAACGTAAAACCTTCGTAAACACCACGCTCGGTGAGACGTGGGAGGCGAAAATTGGCGAACGTCCGGATGCTGAAGTG
+ATGGCAGAGCGGAAAGAGCATTATTCAGCGCCCGTTCCTGACCGTGTGGCTTACCTGACCGCCGGTATCGACTCCCAGCT
+GGACCGCTACGAAATGCGCGTATGGGGATGGGGGCCGGGTGAGGAAAGCTGGCTGATTGACCGGCAGATTATTATGGGCC
+GCCACGACGATGAACAGACGCTGCTGCGTGTGGATGAGGCCATCAATAAAACCTATACCCGCCGGAATGGTGCAGAAATG
+TCGATATCCCGTATCTGCTGGGATACTGGCGGGATTGACCCGACCATTGTGTATGAACGCTCGAAAAAACATGGGCTGTT
+CCGGGTGATCCCCATTAAAGGGGCATCCGTCTACGGAAAGCCGGTGGCCAGCATGCCACGTAAGCGAAACAAAAACGGGG
+TTTACCTTACCGAAATCGGTACGGATACCGCGAAAGAGCAGATTTATAACCGCTTCACACTGACGCCGGAAGGGGATGAA
+CCGCTTCCCGGTGCCGTTCACTTCCCGAATAACCCGGATATTTTTGATCTGACCGAAGCGCAGCAGCTGACTGCTGAAGA
+GCAGGTCGAAAAATGGGTGGATGGCAGGAAAAAAATACTGTGGGACAGCAAAAAGCGACGCAATGAGGCACTCGACTGCT
+TCGTTTATGCGCTGGCGGCGCTGCGCATCAGTATTTCCCGCTGGCAGCTGGATCTCAGTGCGCTGCTGGCGAGCCTGCAG
+GAAGAGGATGGTGCAGCAACCAACAAGAAAACACTGGCAGATTACGCCCGTGCCTTATCCGGAGAGGATGAATGACGCGA
+CAGGAAGAACTTGCCGCTGCCCGTGCGGCACTGCATGACCTGATGACAGGTAAACGGGTGGCAACAGTACAGAAAGACGG
+ACGAAGGGTGGAGTTTACGGCCACTTCCGTGTCTGACCTGAAAAAATATATTGCAGAGCTGGAAGTGCAGACCGGCATGA
+CACAGCGACGCAGGGGACCTGCAGGATTTTATGTATGAAAACGCCCACCATTCCCACCCTTCTGGGGCCGGACGGCATGA
+CATCGCTGCGCGAATATGCCGGTTATCACGGCGGTGGCAGCGGATTTGGAGGGCAGTTGCGGTCGTGGAACCCACCGAGT
+GAAAGTGTGGATGCAGCCCTGTTGCCCAACTTTACCCGTGGCAATGCCCGCGCAGACGATCTGGTACGCAATAACGGCTA
+TGCCGCCAACGCCATCCAGCTGCATCAGGATCATATCGTCGGGTCTTTTTTCCGGCTCAGTCATCGCCCAAGCTGGCGCT
+ATCTGGGCATCGGGGAGGAAGAAGCCCGTGCCTTTTCCCGCGAGGTTGAAGCGGCATGGAAAGAGTTTGCCGAGGATGAC
+TGCTGCTGCATTGACGTTGAGCGAAAACGCACGTTTACCATGATGATTCGGGAAGGTGTGGCCATGCACGCCTTTAACGG
+TGAACTGTTCGTTCAGGCCACCTGGGATACCAGTTCGTCGCGGCTTTTCCGGACACAGTTCCGGATGGTCAGCCCGAAGC
+GCATCAGCAACCCGAACAATACCGGCGACAGCCGGAACTGCCGTGCCGGTGTGCAGATTAATGACAGCGGTGCGGCGCTG
+GGATATTACGTCAGCGAGGACGGGTATCCTGGCTGGATGCCGCAGAAATGGACATGGATACCCCGTGAGTTACCCGGCGG
+GCGCGCCTCGTTCATTCACGTTTTTGAACCCGTGGAGGACGGGCAGACTCGCGGTGCAAATGTGTTTTACAGCGTGATGG
+AGCAGATGAAGATGCTCGACACGCTGCAGAACACGCAGCTGCAGAGCGCCATTGTGAAGGCGATGTATGCCGCCACCATT
+GAGAGTGAGCTGGATACGCAGTCAGCGATGGATTTTATTCTGGGCGCGAACAGTCAGGAGCAGCGGGAAAGGCTGACCGG
+CTGGATTGGTGAAATTGCCGCGTATTACGCCGCAGCGCCGGTCCGGCTGGGAGGCGCAAAAGTACCGCACCTGATGCCGG
+GTGACTCACTGAACCTGCAGACGGCTCAGGATACGGATAACGGCTACTCCGTGTTTGAGCAGTCACTGCTGCGGTATATC
+GCTGCCGGGCTGGGTGTCTCGTATGAGCAGCTTTCCCGGAATTACGCCCAGATGAGCTACTCCACGGCACGGGCCAGTGC
+GAACGAGTCGTGGGCGTACTTTATGGGGCGGCGAAAATTCGTCGCATCCCGTCAGGCGAGCCAGATGTTTCTGTGCTGGC
+TGGAAGAGGCCATCGTTCGCCGCGTGGTGACGTTACCTTCAAAAGCGCGCTTCAGTTTTCAGGAAGCCCGCAGTGCCTGG
+GGGAACTGCGACTGGATAGGCTCCGGTCGTATGGCCATCGATGGTCTGAAAGAAGTTCAGGAAGCGGTGATGCTGATAGA
+AGCCGGACTGAGTACCTACGAGAAAGAGTGCGCAAAACGCGGTGACGACTATCAGGAAATTTTTGCCCAGCAGGTCCGTG
+AAACGATGGAGCGCCGTGCAGCCGGTCTTAAACCGCCCGCCTGGGCGGCTGCAGCATTTGAATCCGGGCTGCGACAATCA
+ACAGAGGAGGAGAAGAGTGACAGCAGAGCTGCGTAATCTCCCGCATATTGCCAGCATGGCCTTTAATGAGCCGCTGATGC
+TTGAACCCGCCTATGCGCGGGTTTTCTTTTGTGCGCTTGCAGGCCAGCTTGGGATCAGCAGCCTGACGGATGCGGTGTCC
+GGCGACAGCCTGACTGCCCAGGAGGCACTCGCGACGCTGGCATTATCCGGTGATGATGACGGACCACGACAGGCCCGCAG
+TTATCAGGTCATGAACGGCATCGCCGTGCTGCCGGTGTCCGGCACGCTGGTCAGCCGGACGCGGGCGCTGCAGCCGTACT
+CGGGGATGACCGGTTACAACGGCATTATCGCCCGTCTGCAACAGGCTGCCAGCGATCCGATGGTGGACGGCATTCTGCTC
+GATATGGACACGCCCGGCGGGATGGTGGCGGGGGCATTTGACTGCGCTGACATCATCGCCCGTGTGCGTGACATAAAACC
+GGTATGGGCGCTTGCCAACGACATGAACTGCAGTGCAGGTCAGTTGCTTGCCAGTGCCGCCTCCCGGCGTCTGGTCACGC
+AGACCGCCCGGACAGGCTCCATCGGCGTCATGATGGCTCACAGTAATTACGGTGCTGCGCTGGAGAAACAGGGTGTGGAA
+ATCACGCTGATTTACAGCGGCAGCCATAAGGTGGATGGCAACCCCTACAGCCATCTTCCGGATGACGTCCGGGAGACACT
+GCAGTCCCGGATGGACGCAACCCGCCAGATGTTTGCGCAGAAGGTGTCGGCATATACCGGCCTGTCCGTGCAGGTTGTGC
+TGGATACCGAGGCTGCAGTGTACAGCGGTCAGGAGGCCATTGATGCCGGACTGGCTGATGAACTTGTTAACAGCACCGAT
+GCGATCACCGTCATGCGTGATGCACTGGATGCACGTAAATCCCGTCTCTCAGGAGGGCGAATGACCAAAGAGACTCAATC
+AACAACTGTTTCAGCCACTGCTTCGCAGGCTGACGTTACTGACGTGGTGCCAGCGACGGAGGGCGAGAACGCCAGCGCGG
+CGCAGCCGGACGTGAACGCGCAGATCACCGCAGCGGTTGCGGCAGAAAACAGCCGCATTATGGGGATCCTCAACTGTGAG
+GAGGCTCACGGACGCGAAGAACAGGCACGCGTGCTGGCAGAAACCCCCGGTATGACCGTGAAAACGGCCCGCCGCATTCT
+GGCCGCAGCACCACAGAGTGCACAGGCGCGCAGTGACACTGCGCTGGATCGTCTGATGCAGGGGGCACCGGCACCGCTGG
+CTGCAGGTAACCCGGCATCTGATGCCGTTAACGATTTGCTGAACACACCAGTGTAAGGGATGTTTATGACGAGCAAAGAA
+ACCTTTACCCATTACCAGCCGCAGGGCAACAGTGACCCGGCTCATACCGCAACCGCGCCCGGCGGATTGAGTGCGAAAGC
+GCCTGCAATGACCCCGCTGATGCTGGACACCTCCAGCCGTAAGCTGGTTGCGTGGGATGGCACCACCGACGGTGCTGCCG
+TTGGCATTCTTGCGGTTGCTGCTGACCAGACCAGCACCACGCTGACGTTCTACAAGTCCGGCACGTTCCGTTATGAGGAT
+GTGCTCTGGCCGGAGGCTGCCAGCGACGAGACGAAAAAACGGACCGCGTTTGCCGGAACGGCAATCAGCATCGTTTAACT
+TTACCCTTCATCACTAAAGGCCGCCTGTGCGGCTTTTTTTACGGGATTTTTTTATGTCGATGTACACAACCGCCCAACTG
+CTGGCGGCAAATGAGCAGAAATTTAAGTTTGATCCGCTGTTTCTGCGTCTCTTTTTCCGTGAGAGCTATCCCTTCACCAC
+GGAGAAAGTCTATCTCTCACAAATTCCGGGACTGGTAAACATGGCGCTGTACGTTTCGCCGATTGTTTCCGGTGAGGTTA
+TCCGTTCCCGTGGCGGCTCCACCTCTGAATTTACGCCGGGATATGTCAAGCCGAAGCATGAAGTGAATCCGCAGATGACC
+CTGCGTCGCCTGCCGGATGAAGATCCGCAGAATCTGGCGGACCCGGCTTACCGCCGCCGTCGCATCATCATGCAGAACAT
+GCGTGACGAAGAGCTGGCCATTGCTCAGGTCGAAGAGATGCAGGCAGTTTCTGCCGTGCTTAAGGGCAAATACACCATGA
+CCGGTGAAGCCTTCGATCCGGTTGAGGTGGATATGGGCCGCAGTGAGGAGAATAACATCACGCAGTCCGGCGGCACGGAG
+TGGAGCAAGCGTGACAAGTCCACGTATGACCCGACCGACGATATCGAAGCCTACGCGCTGAACGCCAGCGGTGTGGTGAA
+TATCATCGTGTTCGATCCGAAAGGCTGGGCGCTGTTCCGTTCCTTCAAAGCCGTCAAGGAGAAGCTGGATACCCGTCGTG
+GCTCTAATTCCGAGCTGGAGACAGCGGTGAAAGACCTGGGCAAAGCGGTGTCCTATAAGGGGATGTATGGCGATGTGGCC
+ATCGTCGTGTATTCCGGACAGTACGTGGAAAACGGCGTCAAAAAGAACTTCCTGCCGGACAACACGATGGTGCTGGGGAA
+CACTCAGGCACGCGGTCTGCGCACCTATGGCTGCATTCAGGATGCGGACGCACAGCGCGAAGGCATTAACGCCTCTGCCC
+GTTACCCGAAAAACTGGGTGACCACCGGCGATCCGGCGCGTGAGTTCACCATGATTCAGTCAGCACCGCTGATGCTGCTG
+GCTGACCCTGATGAGTTCGTGTCCGTACAACTGGCGTAATCATGGCCCTTCGGGGCCATTGTTTCTCTGTGGAGGAGTCC
+ATGACGAAAGATGAACTGATTGCCCGTCTCCGCTCGCTGGGTGAACAACTGAACCGTGATGTCAGCCTGACGGGGACGAA
+AGAAGAACTGGCGCTCCGTGTGGCAGAGCTGAAAGAGGAGCTTGATGACACGGATGAAACTGCCGGTCAGGACACCCCTC
+TCAGCCGGGAAAATGTGCTGACCGGACATGAAAATGAGGTGGGATCAGCGCAGCCGGATACCGTGATTCTGGATACGTCT
+GAACTGGTCACGGTCGTGGCACTGGTGAAGCTGCATACTGATGCACTTCACGCCACGCGGGATGAACCTGTGGCATTTGT
+GCTGCCGGGAACGGCGTTTCGTGTCTCTGCCGGTGTGGCAGCCGAAATGACAGAGCGCGGCCTGGCCAGAATGCAATAAC
+GGGAGGCGCTGTGGCTGATTTCGATAACCTGTTCGATGCTGCCATTGCCCGCGCCGATGAAACGATACGCGGGTACATGG
+GAACGTCAGCCACCATTACATCCGGTGAGCAGTCAGGTGCGGTGATACGTGGTGTTTTTGATGACCCTGAAAATATCAGC
+TATGCCGGACAGGGCGTGCGCGTTGAAGGCTCCAGCCCGTCCCTGTTTGTCCGGACTGATGAGGTGCGGCAGCTGCGGCG
+TGGAGACACGCTGACCATCGGTGAGGAAAATTTCTGGGTAGATCGGGTTTCGCCGGATGATGGCGGAAGTTGTCATCTCT
+GGCTTGGACGGGGCGTACCGCCTGCCGTTAACCGTCGCCGCTGAAAGGGGGATGTATGGCCATAAAAGGTCTTGAGCAGG
+CCGTTGAAAACCTCAGCCGTATCAGCAAAACGGCGGTGCCTGGTGCCGCCGCAATGGCCATTAACCGCGTTGCTTCATCC
+GCGATATCGCAGTCGGCGTCACAGGTTGCCCGTGAGACAAAGGTACGCCGGAAACTGGTAAAGGAAAGGGCCAGGCTGAA
+AAGGGCCACGGTCAAAAATCCGCAGGCCAGAATCAAAGTTAACCGGGGGGATTTGCCCGTAATCAAGCTGGGTAATGCGC
+GGGTTGTCCTTTCGCGCCGCAGGCGTCGTAAAAAGGGGCAGCGTTCATCCCTGAAAGGTGGCGGCAGCGTGCTTGTGGTG
+GGTAACCGTCGTATTCCCGGCGCGTTTATTCAGCAACTGAAAAATGGCCGGTGGCATGTCATGCAGCGTGTGGCTGGGAA
+AAACCGTTACCCCATTGATGTGGTGAAAATCCCGATGGCGGTGCCGCTGACCACGGCGTTTAAACAAAATATTGAGCGGA
+TACGGCGTGAACGTCTTCCGAAAGAGCTGGGCTATGCGCTGCAGCATCAACTGAGGATGGTAATAAAGCGATGAAACATA
+CTGAACTCCGTGCAGCCGTACTGGATGCACTGGAGAAGCATGACACCGGGGCGACGTTTTTTGATGGTCGCCCCGCTGTT
+TTTGATGAGGCGGATTTTCCGGCAGTTGCCGTTTATCTCACCGGCGCTGAATACACGGGCGAAGAGCTGGACAGCGATAC
+CTGGCAGGCGGAGCTGCATATCGAAGTTTTCCTGCCTGCTCAGGTGCCGGATTCAGAGCTGGATGCGTGGATGGAGTCCC
+GGATTTATCCGGTGATGAGCGATATCCCGGCACTGTCAGATTTGATCACCAGTATGGTGGCCAGCGGCTATGACTACCGG
+CGCGACGATGATGCGGGCTTGTGGAGTTCAGCCGATCTGACTTATGTCATTACCTATGAAATGTGAGGACGCTATGCCTG
+TACCAAATCCTACAATGCCGGTGAAAGGTGCCGGGACCACCCTGTGGGTTTATAAGGGGAGCGGTGACCCTTACGCGAAT
+CCGCTTTCAGACGTTGACTGGTCGCGTCTGGCAAAAGTTAAAGACCTGACGCCCGGCGAACTGACCGCTGAGTCCTATGA
+CGACAGCTATCTCGATGATGAAGATGCAGACTGGACTGCGACCGGGCAGGGGCAGAAATCTGCCGGAGATACCAGCTTCA
+CGCTGGCGTGGATGCCCGGAGAGCAGGGGCAGCAGGCGCTGCTGGCGTGGTTTAATGAAGGCGATACCCGTGCCTATAAA
+ATCCGCTTCCCGAACGGCACGGTCGATGTGTTCCGTGGCTGGGTCAGCAGTATCGGTAAGGCGGTGACGGCGAAGGAAGT
+GATCACCCGCACGGTGAAAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGTAACAGCGGCAACCG
+GCATGACCGTGACGCCTGCCAGCACCTCGGTGGTGAAAGGGCAGAGCACCACGCTGACCGTGGCCTTCCAGCCGGAGGGC
+GTAACCGACAAGAGCTTTCGTGCGGTGTCTGCGGATAAAACAAAAGCCACCGTGTCGGTCAGTGGTATGACCATCACCGT
+GAACGGCGTTGCTGCAGGCAAGGTCAACATTCCGGTTGTATCCGGTAATGGTGAGTTTGCTGCGGTTGCAGAAATTACCG
+TCACCGCCAGTTAATCCGGAGAGTCAGCGATGTTCCTGAAAACCGAATCATTTGAACATAACGGTGTGACCGTCACGCTT
+TCTGAACTGTCAGCCCTGCAGCGCATTGAGCATCTCGCCCTGATGAAACGGCAGGCAGAACAGGCGGAGTCAGACAGCAA
+CCGGAAGTTTACTGTGGAAGACGCCATCAGAACCGGCGCGTTTCTGGTGGCGATGTCCCTGTGGCATAACCATCCGCAGA
+AGACGCAGATGCCGTCCATGAATGAAGCCGTTAAACAGATTGAGCAGGAAGTGCTTACCACCTGGCCCACGGAGGCAATT
+TCTCATGCTGAAAACGTGGTGTACCGGCTGTCTGGTATGTATGAGTTTGTGGTGAATAATGCCCCTGAACAGACAGAGGA
+CGCCGGGCCCGCAGAGCCTGTTTCTGCGGGAAAGTGTTCGACGGTGAGCTGAGTTTTGCCCTGAAACTGGCGCGTGAGAT
+GGGGCGACCCGACTGGCGTGCCATGCTTGCCGGGATGTCATCCACGGAGTATGCCGACTGGCACCGCTTTTACAGTACCC
+ATTATTTTCATGATGTTCTGCTGGATATGCACTTTTCCGGGCTGACGTACACCGTGCTCAGCCTGTTTTTCAGCGATCCG
+GATATGCATCCGCTGGATTTCAGTCTGCTGAACCGGCGCGAGGCTGACGAAGAGCCTGAAGATGATGTGCTGATGCAGAA
+AGCGGCAGGGCTTGCCGGAGGTGTCCGCTTTGGCCCGGACGGGAATGAAGTTATCCCCGCTTCCCCGGATGTGGCGGACA
+TGACGGAGGATGACGTAATGCTGATGACAGTATCAGAAGGGATCGCAGGAGGAGTCCGGTATGGCTGAACCGGTAGGCGA
+TCTGGTCGTTGATTTGAGTCTGGATGCGGCCAGATTTGACGAGCAGATGGCCAGAGTCAGGCGTCATTTTTCTGGTACGG
+AAAGTGATGCGAAAAAAACAGCGGCAGTCGTTGAACAGTCGCTGAGCCGACAGGCGCTGGCTGCACAGAAAGCGGGGATT
+TCCGTCGGGCAGTATAAAGCCGCCATGCGTATGCTGCCTGCACAGTTCACCGACGTGGCCACGCAGCTTGCAGGCGGGCA
+AAGTCCGTGGCTGATCCTGCTGCAACAGGGGGGGCAGGTGAAGGACTCCTTCGGCGGGATGATCCCCATGTTCAGGGGGC
+TTGCCGGTGCGATCACCCTGCCGATGGTGGGGGCCACCTCGCTGGCGGTGGCGACCGGTGCGCTGGCGTATGCCTGGTAT
+CAGGGCAACTCAACCCTGTCCGATTTCAACAAAACGCTGGTCCTTTCCGGCAATCAGGCGGGACTGACGGCAGATCGTAT
+GCTGGTCCTGTCCAGAGCCGGGCAGGCGGCAGGGCTGACGTTTAACCAGACCAGCGAGTCACTCAGCGCACTGGTTAAGG
+CGGGGGTAAGCGGTGAGGCTCAGATTGCGTCCATCAGCCAGAGTGTGGCGCGTTTCTCCTCTGCATCCGGCGTGGAGGTG
+GACAAGGTCGCTGAAGCCTTCGGGAAGCTGACCACAGACCCGACGTCGGGGCTGACGGCGATGGCTCGCCAGTTCCATAA
+CGTGTCGGCGGAGCAGATTGCGTATGTTGCTCAGTTGCAGCGTTCCGGCGATGAAGCCGGGGCATTGCAGGCGGCGAACG
+AGGCCGCAACGAAAGGGTTTGATGACCAGACCCGCCGCCTGAAAGAGAACATGGGCACGCTGGAGACCTGGGCAGACAGG
+ACTGCGCGGGCATTCAAATCCATGTGGGATGCGGTGCTGGATATTGGTCGTCCTGATACCGCGCAGGAGATGCTGATTAA
+GGCAGAGGCTGCGTATAAGAAAGCAGACGACATCTGGAATCTGCGCAAGGATGATTATTTTGTTAACGATGAAGCGCGGG
+CGCGTTACTGGGATGATCGTGAAAAGGCCCGTCTTGCGCTTGAAGCCGCCCGAAAGAAGGCTGAGCAGCAGACTCAACAG
+GACAAAAATGCGCAGCAGCAGAGCGATACCGAAGCGTCACGGCTGAAATATACCGAAGAGGCGCAGAAGGCTTACGAACG
+GCTGCAGACGCCGCTGGAGAAATATACCGCCCGTCAGGAAGAACTGAACAAGGCACTGAAAGACGGGAAAATCCTGCAGG
+CGGATTACAACACGCTGATGGCGGCGGCGAAAAAGGATTATGAAGCGACGCTGAAAAAGCCGAAACAGTCCAGCGTGAAG
+GTGTCTGCGGGCGATCGTCAGGAAGACAGTGCTCATGCTGCCCTGCTGACGCTTCAGGCAGAACTCCGGACGCTGGAGAA
+GCATGCCGGAGCAAATGAGAAAATCAGCCAGCAGCGCCGGGATTTGTGGAAGGCGGAGAGTCAGTTCGCGGTACTGGAGG
+AGGCGGCGCAACGTCGCCAGCTGTCTGCACAGGAGAAATCCCTGCTGGCGCATAAAGATGAGACGCTGGAGTACAAACGC
+CAGCTGGCTGCACTTGGCGACAAGGTTACGTATCAGGAGCGCCTGAACGCGCTGGCGCAGCAGGCGGATAAATTCGCACA
+GCAGCAACGGGCAAAACGGGCCGCCATTGATGCGAAAAGCCGGGGGCTGACTGACCGGCAGGCAGAACGGGAAGCCACGG
+AACAGCGCCTGAAGGAACAGTATGGCGATAATCCGCTGGCGCTGAATAACGTCATGTCAGAGCAGAAAAAGACCTGGGCG
+GCTGAAGACCAGCTTCGCGGGAACTGGATGGCAGGCCTGAAGTCCGGCTGGAGTGAGTGGGAAGAGAGCGCCACGGACAG
+TATGTCGCAGGTAAAAAGTGCAGCCACGCAGACCTTTGATGGTATTGCACAGAATATGGCGGCGATGCTGACCGGCAGTG
+AGCAGAACTGGCGCAGCTTCACCCGTTCCGTGCTGTCCATGATGACAGAAATTCTGCTTAAGCAGGCAATGGTGGGGATT
+GTCGGGAGTATCGGCAGCGCCATTGGCGGGGCTGTTGGTGGCGGCGCATCCGCGTCAGGCGGTACAGCCATTCAGGCCGC
+TGCGGCGAAATTCCATTTTGCAACCGGAGGATTTACGGGAACCGGCGGCAAATATGAGCCAGCGGGGATTGTTCACCGTG
+GTGAGTTTGTCTTCACGAAGGAGGCAACCAGCCGGATTGGCGTGGGGAATCTTTACCGGCTGATGCGCGGCTATGCCACC
+GGCGGTTATGTCGGTACACCGGGCAGCATGGCAGACAGCCGGTCGCAGGCGTCCGGGACGTTTGAGCAGAATAACCATGT
+GGTGATTAACAACGACGGCACGAACGGGCAGATAGGTCCGGCTGCTCTGAAGGCGGTGTATGACATGGCCCGCAAGGGTG
+CCCGTGATGAAATTCAGACACAGATGCGTGATGGTGGCCTGTTCTCCGGAGGTGGACGATGAAGACCTTCCGCTGGAAAG
+TGAAACCCGGTATGGATGTGGCTTCGGTCCCTTCTGTAAGAAAGGTGCGCTTTGGTGATGGCTATTCTCAGCGAGCGCCT
+GCCGGGCTGAATGCCAACCTGAAAACGTACAGCGTGACGCTTTCTGTCCCCCGTGAGGAGGCCACGGTACTGGAGTCGTT
+TCTGGAAGAGCACGGGGGCTGGAAATCCTTTCTGTGGACGCCGCCTTATGAGTGGCGGCAGATAAAGGTGACCTGCGCAA
+AATGGTCGTCGCGGGTCAGTATGCTGCGTGTTGAGTTCAGCGCAGAGTTTGAACAGGTGGTGAACTGATGCAGGATATCC
+GGCAGGAAACACTGAATGAATGCACCCGTGCGGAGCAGTCGGCCAGCGTGGTGCTCTGGGAAATCGACCTGACAGAGGTC
+GGTGGAGAACGTTATTTTTTCTGTAATGAGCAGAACGAAAAAGGTGAGCCGGTCACCTGGCAGGGGCGACAGTATCAGCC
+GTATCCCATTCAGGGGAGCGGTTTTGAACTGAATGGCAAAGGCACCAGTACGCGCCCCACGCTGACGGTTTCTAACCTGT
+ACGGTATGGTCACCGGGATGGCGGAAGATATGCAGAGTCTGGTCGGCGGAACGGTGGTCCGGCGTAAGGTTTACGCCCGT
+TTTCTGGATGCGGTGAACTTCGTCAACGGAAACAGTTACGCCGATCCGGAGCAGGAGGTGATCAGCCGCTGGCGCATTGA
+GCAGTGCAGCGAACTGAGCGCGGTGAGTGCCTCCTTTGTACTGTCCACGCCGACGGAAACGGATGGCGCTGTTTTTCCGG
+GACGTATCATGCTGGCCAACACCTGCACCTGGACCTATCGCGGTGACGAGTGCGGTTATAGCGGTCCGGCTGTCGCGGAT
+GAATATGACCAGCCAACGTCCGATATCACGAAGGATAAATGCAGCAAATGCCTGAGCGGTTGTAAGTTCCGCAATAACGT
+CGGCAACTTTGGCGGCTTCCTTTCCATTAACAAACTTTCGCAGTAAATCCCATGACACAGACAGAATCAGCGATTCTGGC
+GCACGCCCGGCGATGTGCGCCAGCGGAGTCGTGCGGCTTCGTGGTAAGCACGCCGGAGGGGGAAAGATATTTCCCCTGCG
+TGAATATCTCCGGTGAGCCGGAGGCGTATTTCCGTATGTCGCCGGAAGACTGGCTGCAGGCAGAAATGCAGGGTGAGATT
+GTGGCGCTGGTCCACAGCCACCCCGGTGGTCTGCCCTGGCTGAGTGAGGCCGACCGGCGGCTGCAGGTGCAGAGTGATTT
+GCCGTGGTGGCTGGTCTGCCGGGGGACGATTCATAAGTTCCGCTGTGTGCCGCATCTCACCGGGCGGCGCTTTGAGCACG
+GTGTGACGGACTGTTACACACTGTTCCGGGATGCTTATCATCTGGCGGGGATTGAGATGCCGGACTTTCATCGTGAGGAT
+GACTGGTGGCGTAACGGCCAGAATCTCTATCTGGATAATCTGGAGGCGACGGGGCTGTATCAGGTGCCGTTGTCAGCGGC
+ACAGCCGGGCGATGTGCTGCTGTGCTGTTTTGGTTCATCAGTGCCGAATCACGCCGCAATTTACTGCGGCGACGGCGAGC
+TGCTGCACCATATTCCTGAACAACTGAGCAAACGAGAGAGGTACACCGACAAATGGCAGCGACGCACACACTCCCTCTGG
+CGTCACCGGGCATGGCGCGCATCTGCCTTTACGGGGATTTACAACGATTTGGTCGCCGCATCGACCTTCGTGTGAAAACG
+GGGGCTGAAGCCATCCGGGCACTGGCCACACAGCTCCCGGCGTTTCGTCAGAAACTGAGCGACGGCTGGTATCAGGTACG
+GATTGCCGGGCGGGACGTCAGCACGTCCGGGTTAACGGCGCAGTTACATGAGACTCTGCCTGATGGCGCTGTAATTCATA
+TTGTTCCCAGAGTCGCCGGGGCCAAGTCAGGTGGCGTATTCCAGATTGTCCTGGGGGCTGCCGCCATTGCCGGATCATTC
+TTTACCGCCGGAGCCACCCTTGCAGCATGGGGGGCAGCCATTGGGGCCGGTGGTATGACCGGCATCCTGTTTTCTCTCGG
+TGCCAGTATGGTGCTCGGTGGTGTGGCGCAGATGCTGGCACCGAAAGCCAGAACTCCCCGTATACAGACAACGGATAACG
+GTAAGCAGAACACCTATTTCTCCTCACTGGATAACATGGTTGCCCAGGGCAATGTTCTGCCTGTTCTGTACGGGGAAATG
+CGCGTGGGGTCACGCGTGGTTTCTCAGGAGATCAGCACGGCAGACGAAGGGGACGGTGGTCAGGTTGTGGTGATTGGTCG
+CTGATGCAAAATGTTTTATGTGAAACCGCCTGCGGGCGGTTTTGTCATTTATGGAGCGTGAGGAATGGGTAAAGGAAGCA
+GTAAGGGGCATACCCCGCGCGAAGCGAAGGACAACCTGAAGTCCACGCAGTTGCTGAGTGTGATCGATGCCATCAGCGAA
+GGGCCGATTGAAGGTCCGGTGGATGGCTTAAAAAGCGTGCTGCTGAACAGTACGCCGGTGCTGGACACTGAGGGGAATAC
+CAACATATCCGGTGTCACGGTGGTGTTCCGGGCTGGTGAGCAGGAGCAGACTCCGCCGGAGGGATTTGAATCCTCCGGCT
+CCGAGACGGTGCTGGGTACGGAAGTGAAATATGACACGCCGATCACCCGCACCATTACGTCTGCAAACATCGACCGTCTG
+CGCTTTACCTTCGGTGTACAGGCACTGGTGGAAACCACCTCAAAGGGTGACAGGAATCCGTCGGAAGTCCGCCTGCTGGT
+TCAGATACAACGTAACGGTGGCTGGGTGACGGAAAAAGACATCACCATTAAGGGCAAAACCACCTCGCAGTATCTGGCCT
+CGGTGGTGATGGGTAACCTGCCGCCGCGCCCGTTTAATATCCGGATGCGCAGGATGACGCCGGACAGCACCACAGACCAG
+CTGCAGAACAAAACGCTCTGGTCGTCATACACTGAAATCATCGATGTGAAACAGTGCTACCCGAACACGGCACTGGTCGG
+CGTGCAGGTGGACTCGGAGCAGTTCGGCAGCCAGCAGGTGAGCCGTAATTATCATCTGCGCGGGCGTATTCTGCAGGTGC
+CGTCGAACTATAACCCGCAGACGCGGCAATACAGCGGTATCTGGGACGGAACGTTTAAACCGGCATACAGCAACAACATG
+GCCTGGTGTCTGTGGGATATGCTGACCCATCCGCGCTACGGCATGGGGAAACGTCTTGGTGCGGCGGATGTGGATAAATG
+GGCGCTGTATGTCATCGGCCAGTACTGCGACCAGTCAGTGCCGGACGGCTTTGGCGGCACGGAGCCGCGCATCACCTGTA
+ATGCGTACCTGACCACACAGCGTAAGGCGTGGGATGTGCTCAGCGATTTCTGCTCGGCGATGCGCTGTATGCCGGTATGG
+AACGGGCAGACGCTGACGTTCGTGCAGGACCGACCGTCGGATAAGACGTGGACCTATAACCGCAGTAATGTGGTGATGCC
+GGATGATGGCGCGCCGTTCCGCTACAGCTTCAGCGCCCTGAAGGACCGCCATAATGCCGTTGAGGTGAACTGGATTGACC
+CGAACAACGGCTGGGAGACGGCGACAGAGCTTGTTGAAGATACGCAGGCCATTGCCCGTTACGGTCGTAATGTTACGAAG
+ATGGATGCCTTTGGCTGTACCAGCCGGGGGCAGGCACACCGCGCCGGGCTGTGGCTGATTAAAACAGAACTGCTGGAAAC
+GCAGACCGTGGATTTCAGCGTCGGCGCAGAAGGGCTTCGCCATGTACCGGGCGATGTTATTGAAATCTGCGATGATGACT
+ATGCCGGTATCAGCACCGGTGGTCGTGTGCTGGCGGTGAACAGCCAGACCCGGACGCTGACGCTCGACCGTGAAATCACG
+CTGCCATCCTCCGGTACCGCGCTGATAAGCCTGGTTGACGGAAGTGGCAATCCGGTCAGCGTGGAGGTTCAGTCCGTCAC
+CGACGGCGTGAAGGTAAAAGTGAGCCGTGTTCCTGACGGTGTTGCTGAATACAGCGTATGGGAGCTGAAGCTGCCGACGC
+TGCGCCAGCGACTGTTCCGCTGCGTGAGTATCCGTGAGAACGACGACGGCACGTATGCCATCACCGCCGTGCAGCATGTG
+CCGGAAAAAGAGGCCATCGTGGATAACGGGGCGCACTTTGACGGCGAACAGAGTGGCACGGTGAATGGTGTCACGCCGCC
+AGCGGTGCAGCACCTGACCGCAGAAGTCACTGCAGACAGCGGGGAATATCAGGTGCTGGCGCGATGGGACACACCGAAGG
+TGGTGAAGGGCGTGAGTTTCCTGCTCCGTCTGACCGTAACAGCGGACGACGGCAGTGAGCGGCTGGTCAGCACGGCCCGG
+ACGACGGAAACCACATACCGCTTCACGCAACTGGCGCTGGGGAACTACAGGCTGACAGTCCGGGCGGTAAATGCGTGGGG
+GCAGCAGGGCGATCCGGCGTCGGTATCGTTCCGGATTGCCGCACCGGCAGCACCGTCGAGGATTGAGCTGACGCCGGGCT
+ATTTTCAGATAACCGCCACGCCGCATCTTGCCGTTTATGACCCGACGGTACAGTTTGAGTTCTGGTTCTCGGAAAAGCAG
+ATTGCGGATATCAGACAGGTTGAAACCAGCACGCGTTATCTTGGTACGGCGCTGTACTGGATAGCCGCCAGTATCAATAT
+CAAACCGGGCCATGATTATTACTTTTATATCCGCAGTGTGAACACCGTTGGCAAATCGGCATTCGTGGAGGCCGTCGGTC
+GGGCGAGCGATGATGCGGAAGGTTACCTGGATTTTTTCAAAGGCAAGATAACCGAATCCCATCTCGGCAAGGAGCTGCTG
+GAAAAAGTCGAGCTGACGGAGGATAACGCCAGCAGACTGGAGGAGTTTTCGAAAGAGTGGAAGGATGCCAGTGATAAGTG
+GAATGCCATGTGGGCTGTCAAAATTGAGCAGACCAAAGACGGCAAACATTATGTCGCGGGTATTGGCCTCAGCATGGAGG
+ACACGGAGGAAGGCAAACTGAGCCAGTTTCTGGTTGCCGCCAATCGTATCGCATTTATTGACCCGGCAAACGGGAATGAA
+ACGCCGATGTTTGTGGCGCAGGGCAACCAGATATTCATGAACGACGTGTTCCTGAAGCGCCTGACGGCCCCCACCATTAC
+CAGCGGCGGCAATCCTCCGGCCTTTTCCCTGACACCGGACGGAAAGCTGACCGCTAAAAATGCGGATATCAGTGGCAGTG
+TGAATGCGAACTCCGGGACGCTCAGTAATGTGACGATAGCTGAAAACTGTACGATAAACGGTACGCTGAGGGCGGAAAAA
+ATCGTCGGGGACATTGTAAAGGCGGCGAGCGCGGCTTTTCCGCGCCAGCGTGAAAGCAGTGTGGACTGGCCGTCAGGTAC
+CCGTACTGTCACCGTGACCGATGACCATCCTTTTGATCGCCAGATAGTGGTGCTTCCGCTGACGTTTCGCGGAAGTAAGC
+GTACTGTCAGCGGCAGGACAACGTATTCGATGTGTTATCTGAAAGTACTGATGAACGGTGCGGTGATTTATGATGGCGCG
+GCGAACGAGGCGGTACAGGTGTTCTCCCGTATTGTTGACATGCCAGCGGGTCGGGGAAACGTGATCCTGACGTTCACGCT
+TACGTCCACACGGCATTCGGCAGATATTCCGCCGTATACGTTTGCCAGCGATGTGCAGGTTATGGTGATTAAGAAACAGG
+CGCTGGGCATCAGCGTGGTCTGAGTGTGTTACAGAGGTTCGTCCGGGAACGGGCGTTTTATTATAAAACAGTGAGAGGTG
+AACGATGCGTAATGTGTGTATTGCCGTTGCTGTCTTTGCCGCACTTGCGGTGACAGTCACTCCGGCCCGTGCGGAAGGTG
+GACATGGTACGTTTACGGTGGGCTATTTTCAAGTGAAACCGGGTACATTGCCGTCGTTGTCGGGCGGGGATACCGGTGTG
+AGTCATCTGAAAGGGATTAACGTGAAGTACCGTTATGAGCTGACGGACAGTGTGGGGGTGATGGCTTCCCTGGGGTTCGC
+CGCGTCGAAAAAGAGCAGCACAGTGATGACCGGGGAGGATACGTTTCACTATGAGAGCCTGCGTGGACGTTATGTGAGCG
+TGATGGCCGGACCGGTTTTACAAATCAGTAAGCAGGTCAGTGCGTACGCCATGGCCGGAGTGGCTCACAGTCGGTGGTCC
+GGCAGTACAATGGATTACCGTAAGACGGAAATCACTCCCGGGTATATGAAAGAGACGACCACTGCCAGGGACGAAAGTGC
+AATGCGGCATACCTCAGTGGCGTGGAGTGCAGGTATACAGATTAATCCGGCAGCGTCCGTCGTTGTTGATATTGCTTATG
+AAGGCTCCGGCAGTGGCGACTGGCGTACTGACGGATTCATCGTTGGGGTCGGTTATAAATTCTGATTAGCCAGGTAACAC
+AGTGTTATGACAGCCCGCCGGAACCGGTGGGCTTTTTTGTGGGGTGAATATGGCAGTAAAGATTTCAGGAGTCCTGAAAG
+ACGGCACAGGAAAACCGGTACAGAACTGCACCATTCAGCTGAAAGCCAGACGTAACAGCACCACGGTGGTGGTGAACACG
+GTGGGCTCAGAGAATCCGGATGAAGCCGGGCGTTACAGCATGGATGTGGAGTACGGTCAGTACAGTGTCATCCTGCAGGT
+TGACGGTTTTCCACCATCGCACGCCGGGACCATCACCGTGTATGAAGATTCACAACCGGGGACGCTGAATGATTTTCTCT
+GTGCCATGACGGAGGATGATGCCCGGCCGGAGGTGCTGCGTCGTCTTGAACTGATGGTGGAAGAGGTGGCGCGTAACGCG
+TCCGTGGTGGCACAGAGTACGGCAGACGCGAAGAAATCAGCCGGCGATGCCAGTGCATCAGCTGCTCAGGTCGCGGCCCT
+TGTGACTGATGCAACTGACTCAGCACGCGCCGCCAGCACGTCCGCCGGACAGGCTGCATCGTCAGCTCAGGAAGCGTCCT
+CCGGCGCAGAAGCGGCATCAGCAAAGGCCACTGAAGCGGAAAAAAGTGCCGCAGCCGCAGAGTCCTCAAAAAACGCGGCG
+GCCACCAGTGCCGGTGCGGCGAAAACGTCAGAAACGAATGCTGCAGCGTCACAACAATCAGCCGCCACGTCTGCCTCCAC
+CGCGGCCACGAAAGCGTCAGAGGCCGCCACTTCAGCACGAGATGCGGTGGCCTCAAAAGAGGCAGCAAAATCATCAGAAA
+CGAACGCATCATCAAGTGCCGGTCGTGCAGCTTCCTCGGCAACGGCGGCAGAAAATTCTGCCAGGGCGGCAAAAACGTCC
+GAGACGAATGCCAGGTCATCTGAAACAGCAGCGGAACGGAGCGCCTCTGCCGCGGCAGACGCAAAAACAGCGGCGGCGGG
+GAGTGCGTCAACGGCATCCACGAAGGCGACAGAGGCTGCGGGAAGTGCGGTATCAGCATCGCAGAGCAAAAGTGCGGCAG
+AAGCGGCGGCAATACGTGCAAAAAATTCGGCAAAACGTGCAGAAGATATAGCTTCAGCTGTCGCGCTTGAGGATGCGGAC
+ACAACGAGAAAGGGGATAGTGCAGCTCAGCAGTGCAACCAACAGCACGTCTGAAACGCTTGCTGCAACGCCAAAGGCGGT
+TAAGGTGGTAATGGATGAAACGAACAGAAAAGCCCACTGGACAGTCCGGCACTGACCGGAACGCCAACAGCACCAACCGC
+GCTCAGGGGAACAAACAATACCCAGATTGCGAACACCGCTTTTGTACTGGCCGCGATTGCAGATGTTATCGACGCGTCAC
+CTGACGCACTGAATACGCTGAATGAACTGGCCGCAGCGCTCGGGAATGATCCAGATTTTGCTACCACCATGACTAACGCG
+CTTGCGGGTAAACAACCGAAGAATGCGACACTGACGGCGCTGGCAGGGCTTTCCACGGCGAAAAATAAATTACCGTATTT
+TGCGGAAAATGATGCCGCCAGCCTGACTGAACTGACTCAGGTTGGCAGGGATATTCTGGCAAAAAATTCCGTTGCAGATG
+TTCTTGAATACCTTGGGGCCGGTGAGAATTCGGCCTTTCCGGCAGGTGCGCCGATCCCGTGGCCATCAGATATCGTTCCG
+TCTGGCTACGTCCTGATGCAGGGGCAGGCGTTTGACAAATCAGCCTACCCAAAACTTGCTGTCGCGTATCCATCGGGTGT
+GCTTCCTGATATGCGAGGCTGGACAATCAAGGGGAAACCCGCCAGCGGTCGTGCTGTATTGTCTCAGGAACAGGATGGAA
+TTAAGTCGCACACCCACAGTGCCAGTGCATCCGGTACGGATTTGGGGACGAAAACCACATCGTCGTTTGATTACGGGACG
+AAAACAACAGGCAGTTTCGATTACGGCACCAAATCGACGAATAACACGGGGGCTCATGCTCACAGTCTGAGCGGTTCAAC
+AGGGGCCGCGGGTGCTCATGCCCACACAAGTGGTTTAAGGATGAACAGTTCTGGCTGGAGTCAGTATGGAACAGCAACCA
+TTACAGGAAGTTTATCCACAGTTAAAGGAACCAGCACACAGGGTATTGCTTATTTATCGAAAACGGACAGTCAGGGCAGC
+CACAGTCACTCATTGTCCGGTACAGCCGTGAGTGCCGGTGCACATGCGCATACAGTTGGTATTGGTGCGCACCAGCATCC
+GGTTGTTATCGGTGCTCATGCCCATTCTTTCAGTATTGGTTCACACGGACACACCATCACCGTTAACGCTGCGGGTAACG
+CGGAAAACACCGTCAAAAACATTGCATTTAACTATATTGTGAGGCTTGCATAATGGCATTCAGAATGAGTGAACAACCAC
+GGACCATAAAAATTTATAATCTGCTGGCCGGAACTAATGAATTTATTGGTGAAGGTGACGCATATATTCCGCCTCATACC
+GGTCTGCCTGCAAACAGTACCGATATTGCACCGCCAGATATTCCGGCTGGCTTTGTGGCTGTTTTCAACAGTGATGAGGC
+ATCGTGGCATCTCGTTGAAGACCATCGGGGTAAAACCGTCTATGACGTGGCTTCCGGCGACGCGTTATTTATTTCTGAAC
+TCGGTCCGTTACCGGAAAATTTTACCTGGTTATCGCCGGGAGGGGAATATCAGAAGTGGAACGGCACAGCCTGGGTGAAG
+GATACGGAAGCAGAAAAACTGTTCCGGATCCGGGAGGCGGAAGAAACAAAAAAAAGCCTGATGCAGGTAGCCAGTGAGCA
+TATTGCGCCGCTTCAGGATGCTGCAGATCTGGAAATTGCAACGAAGGAAGAAACCTCGTTGCTGGAAGCCTGGAAGAAGT
+ATCGGGTGTTGCTGAACCGTGTTGATACATCAACTGCACCTGATATTGAGTGGCCTGCTGTCCCTGTTATGGAGTAATCG
+TTTTGTGATATGCCGCAGAAACGTTGTATGAAATAACGTTCTGCGGTTAGTTAGTATATTGTAAAGCTGAGTATTGGTTT
+ATTTGGCGATTATTATCTTCAGGAGAATAATGGAAGTTCTATGACTCAATTGTTCATAGTGTTTACATCACCGCCAATTG
+CTTTTAAGACTGAACGCATGAAATATGGTTTTTCGTCATGTTTTGAGTCTGCTGTTGATATTTCTAAAGTCGGTTTTTTT
+TCTTCGTTTTCTCTAACTATTTTCCATGAAATACATTTTTGATTATTATTTGAATCAATTCCAATTACCTGAAGTCTTTC
+ATCTATAATTGGCATTGTATGTATTGGTTTATTGGAGTAGATGCTTGCTTTTCTGAGCCATAGCTCTGATATCCAAATGA
+AGCCATAGGCATTTGTTATTTTGGCTCTGTCAGCTGCATAACGCCAAAAAATATATTTATCTGCTTGATCTTCAAATGTT
+GTATTGATTAAATCAATTGGATGGAATTGTTTATCATAAAAAATTAATGTTTGAATGTGATAACCGTCCTTTAAAAAAGT
+CGTTTCTGCAAGCTTGGCTGTATAGTCAACTAACTCTTCTGTCGAAGTGATATTTTTAGGCTTATCTACCAGTTTTAGAC
+GCTCTTTAATATCTTCAGGAATTATTTTATTGTCATATTGTATCATGCTAAATGACAATTTGCTTATGGAGTAATCTTTT
+AATTTTAAATAAGTTATTCTCCTGGCTTCATCAAATAAAGAGTCGAATGATGTTGGCGAAATCACATCGTCACCCATTGG
+ATTGTTTATTTGTATGCCAAGAGAGTTACAGCAGTTATACATTCTGCCATAGATTATAGCTAAGGCATGTAATAATTCGT
+AATCTTTTAGCGTATTAGCGACCCATCGTCTTTCTGATTTAATAATAGATGATTCAGTTAAATATGAAGGTAATTTCTTT
+TGTGCAAGTCTGACTAACTTTTTTATACCAATGTTTAACATACTTTCATTTGTAATAAACTCAATGTCATTTTCTTCAAT
+GTAAGATGAAATAAGAGTAGCCTTTGCCTCGCTATACATTTCTAAATCGCCTTGTTTTTCTATCGTATTGCGAGAATTTT
+TAGCCCAAGCCATTAATGGATCATTTTTCCATTTTTCAATAACATTATTGTTATACCAAATGTCATATCCTATAATCTGG
+TTTTTGTTTTTTTGAATAATAAATGTTACTGTTCTTGCGGTTTGGAGGAATTGATTCAAATTCAAGCGAAATAATTCAGG
+GTCAAAATATGTATCAATGCAGCATTTGAGCAAGTGCGATAAATCTTTAAGTCTTCTTTCCCATGGTTTTTTAGTCATAA
+AACTCTCCATTTTGATAGGTTGCATGCTAGATGCTGATATATTTTAGAGGTGATAAAATTAACTGCTTAACTGTCAATGT
+AATACAAGTTGTTTGATCTTTGCAATGATTCTTATCAGAAACCATATAGTAAATTAGTTACACAGGAAATTTTTAATATT
+ATTATTATCATTCATTATGTATTAAAATTAGAGTTGTGGCTTGGCTCTGCTAACACGTTGCTCATAGGAGATATGGTAGA
+GCCGCAGACACGTCGTATGCAGGAACGTGCTGCGGCTGGCTGGTGAACTTCCGATAGTGCGGGTGTTGAATGATTTCCAG
+TTGCTACCGATTTTACATATTTTTTGCATGAGAGAATTTGTACCACCTCCCACCGACCATCTATGACTGTACGCCACTGT
+CCCTAGGACTGCTATGTGCCGGAGCGGACATTACAAACGTCCTTCTCGGTGCATGCCACTGTTGCCAATGACCTGCCTAG
+GAATTGGTTAGCAAGTTACTACCGGATTTTGTAAAAACAGCCCTCCTCATATAAAAAGTATTCGTTCACTTCCGATAAGC
+GTCGTAATTTTCTATCTTTCATCATATTCTAGATCCCTCTGAAAAAATCTTCCGAGTTTGCTAGGCACTGATACATAACT
+CTTTTCCAATAATTGGGGAAGTCATTCAAATCTATAATAGGTTTCAGATTTGCTTCAATAAATTCTGACTGTAGCTGCTG
+AAACGTTGCGGTTGAACTATATTTCCTTATAACTTTTACGAAAGAGTTTCTTTGAGTAATCACTTCACTCAAGTGCTTCC
+CTGCCTCCAAACGATACCTGTTAGCAATATTTAATAGCTTGAAATGATGAAGAGCTCTGTGTTTGTCTTCCTGCCTCCAG
+TTCGCCGGGCATTCAACATAAAAACTGATAGCACCCGGAGTTCCGGAAACGAAATTTGCATATACCCATTGCTCACGAAA
+AAAAATGTCCTTGTCGATATAGGGATGAATCGCTTGGTGTACCTCATCTACTGCGAAAACTTGACCTTTCTCTCCCATAT
+TGCAGTCGCGGCACGATGGAACTAAATTAATAGGCATCACCGAAAATTCAGGATAATGTGCAATAGGAAGAAAATGATCT
+ATATTTTTTGTCTGTCCTATATCACCACAAAATGGACATTTTTCACCTGATGAAACAAGCATGTCATCGTAATATGTTCT
+AGCGGGTTTGTTTTTATCTCGGAGATTATTTTCATAAAGCTTTTCTAATTTAACCTTTGTCAGGTTACCAACTACTAAGG
+TTGTAGGCTCAAGAGGGTGTGTCCTGTCGTAGGTAAATAACTGACCTGTCGAGCTTAATATTCTATATTGTTGTTCTTTC
+TGCAAAAAAGTGGGGAAGTGAGTAATGAAATTATTTCTAACATTTATCTGCATCATACCTTCCGAGCATTTATTAAGCAT
+TTCGCTATAAGTTCTCGCTGGAAGAGGTAGTTTTTTCATTGTACTTTACCTTCATCTCTGTTCATTATCATCGCTTTTAA
+AACGGTTCGACCTTCTAATCCTATCTGACCATTATAATTTTTTAGAATGGTTTCATAAGAAAGCTCTGAATCAACGGACT
+GCGATAATAAGTGGTGGTATCCAGAATTTGTCACTTCAAGTAAAAACACCTCACGAGTTAAAACACCTAAGTTCTCACCG
+AATGTCTCAATATCCGGACGGATAATATTTATTGCTTCTCTTGACCGTAGGACTTTCCACATGCAGGATTTTGGAACCTC
+TTGCAGTACTACTGGGGAATGAGTTGCAATTATTGCTACACCATTGCGTGCATCGAGTAAGTCGCTTAATGTTCGTAAAA
+AAGCAGAGAGCAAAGGTGGATGCAGATGAACCTCTGGTTCATCGAATAAAACTAATGACTTTTCGCCAACGACATCTACT
+AATCTTGTGATAGTAAATAAAACAATTGCATGTCCAGAGCTCATTCGAAGCAGATATTTCTGGATATTGTCATAAAACAA
+TTTAGTGAATTTATCATCGTCCACTTGAATCTGTGGTTCATTACGTCTTAACTCTTCATATTTAGAAATGAGGCTGATGA
+GTTCCATATTTGAAAAGTTTTCATCACTACTTAGTTTTTTGATAGCTTCAAGCCAGAGTTGTCTTTTTCTATCTACTCTC
+ATACAACCAATAAATGCTGAAATGAATTCTAAGCGGAGATCGCCTAGTGATTTTAAACTATTGCTGGCAGCATTCTTGAG
+TCCAATATAAAAGTATTGTGTACCTTTTGCTGGGTCAGGTTGTTCTTTAGGAGGAGTAAAAGGATCAAATGCACTAAACG
+AAACTGAAACAAGCGATCGAAAATATCCCTTTGGGATTCTTGACTCGATAAGTCTATTATTTTCAGAGAAAAAATATTCA
+TTGTTTTCTGGGTTGGTGATTGCACCAATCATTCCATTCAAAATTGTTGTTTTACCACACCCATTCCGCCCGATAAAAGC
+ATGAATGTTCGTGCTGGGCATAGAATTAACCGTCACCTCAAAAGGTATAGTTAAATCACTGAATCCGGGAGCACTTTTTC
+TATTAAATGAAAAGTGGAAATCTGACAATTCTGGCAAACCATTTAACACACGTGCGAACTGTCCATGAATTTCTGAAAGA
+GTTACCCCTCTAAGTAATGAGGTGTTAAGGACGCTTTCATTTTCAATGTCGGCTAATCGATTTGGCCATACTACTAAATC
+CTGAATAGCTTTAAGAAGGTTATGTTTAAAACCATCGCTTAATTTGCTGAGATTAACATAGTAGTCAATGCTTTCACCTA
+AGGAAAAAAACATTTCAGGGAGTTGACTGAATTTTTTATCTATTAATGAATAAGTGCTTACTTCTTCTTTTTGACCTACA
+AAACCAATTTTAACATTTCCGATATCGCATTTTTCACCATGCTCATCAAAGACAGTAAGATAAAACATTGTAACAAAGGA
+ATAGTCATTCCAACCATCTGCTCGTAGGAATGCCTTATTTTTTTCTACTGCAGGAATATACCCGCCTCTTTCAATAACAC
+TAAACTCCAACATATAGTAACCCTTAATTTTATTAAAATAACCGCAATTTATTTGGCGGCAACACAGGATCTCTCTTTTA
+AGTTACTCTCTATTACATACGTTTTCCATCTAAAAATTAGTAGTATTGAACTTAACGGGGCATCGTATTGTAGTTTTCCA
+TATTTAGCTTTCTGCTTCCTTTTGGATAACCCACTGTTATTCATGTTGCATGGTGCACTGTTTATACCAACGATATAGTC
+TATTAATGCATATATAGTATCGCCGAACGATTAGCTCTTCAGGCTTCTGAAGAAGCGTTTCAAGTACTAATAAGCCGATA
+GATAGCCACGGACTTCGTAGCCATTTTTCATAAGTGTTAACTTCCGCTCCTCGCTCATAACAGACATTCACTACAGTTAT
+GGCGGAAAGGTATGCATGCTGGGTGTGGGGAAGTCGTGAAAGAAAAGAAGTCAGCTGCGTCGTTTGACATCACTGCTATC
+TTCTTACTGGTTATGCAGGTCGTAGTGGGTGGCACACAAAGCTTTGCACTGGATTGCGAGGCTTTGTGCTTCTCTGGAGT
+GCGACAGGTTTGATGACAAAAAATTAGCGCAAGAAGACAAAAATCACCTTGCGCTAATGCTCTGTTACAGGTCACTAATA
+CCATCTAAGTAGTTGATTCATAGTGACTGCATATGTTGTGTTTTACAGTATTATGTAGTCTGTTTTTTATGCAAAATCTA
+ATTTAATATATTGATATTTATATCATTTTACGTTTCTCGTTCAGCTTTTTTATACTAAGTTGGCATTATAAAAAAGCATT
+GCTTATCAATTTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGATTTCAATTTTGTCCCACTCCC
+TGCCTCTGTCATCACGATACTGTGATGCCATGGTGTCCGACTTATGCCCGAGAAGATGTTGAGCAAACTTATCGCTTATC
+TGCTTCTCATAGAGTCTTGCAGACAAACTGCGCAACTCGTGAAAGGTAGGCGGATCCCCTTCGAAGGAAAGACCTGATGC
+TTTTCGTGCGCGCATAAAATACCTTGATACTGTGCCGGATGAAAGCGGTTCGCGACGAGTAGATGCAATTATGGTTTCTC
+CGCCAAGAATCTCTTTGCATTTATCAAGTGTTTCCTTCATTGATATTCCGAGAGCATCAATATGCAATGCTGTTGGGATG
+GCAATTTTTACGCCTGTTTTGCTTTGCTCGACATAAAGATATCCATCTACGATATCAGACCACTTCATTTCGCATAAATC
+ACCAACTCGTTGCCCGGTAACAACAGCCAGTTCCATTGCAAGTCTGAGCCAACATGGTGATGATTCTGCTGCTTGATAAA
+TTTTCAGGTATTCGTCAGCCGTAAGTCTTGATCTCCTTACCTCTGATTTTGCTGCGCGAGTGGCAGCGACATGGTTTGTT
+GTTATATGGCCTTCAGCTATTGCCTCTCGGAATGCATCGCTCAGTGTTGATCTGATTAACTTGGCTGACGCCGCCTTGCC
+CTCGTCTATGTATCCATTGAGCATTGCCGCAATTTCTTTTGTGGTGATGTCTTCAAGTGGAGCATCAGGCAGACCCCTCC
+TTATTGCTTTAATTTTGCTCATGTAATTTATGAGTGTCTTCTGCTTGATTCCTCTGCTGGCCAGGATTTTTTCGTAGCGA
+TCAAGCCATGAATGTAACGTAACGGAATTATCACTGTTGATTCTCGCTGTCAGAGGCTTGTGTTTGTGTCCTGAAAATAA
+CTCAATGTTGGCCTGTATAGCTTCAGTGATTGCGATTCGCCTGTCTCTGCCTAATCCAAACTCTTTACCCGTCCTTGGGT
+CCCTGTAGCAGTAATATCCATTGTTTCTTATATAAAGGTTAGGGGGTAAATCCCGGCGCTCATGACTTCGCCTTCTTCCC
+ATTTCTGATCCTCTTCAAAAGGCCACCTGTTACTGGTCGATTTAAGTCAACCTTTACCGCTGATTCGTGGAACAGATACT
+CTCTTCCATCCTTAACCGGAGGTGGGAATATCCTGCATTCCCGAACCCATCGACGAACTGTTTCAAGGCTTCTTGGACGT
+CGCTGGCGTGCGTTCCACTCCTGAAGTGTCAAGTACATCGCAAAGTCTCCGCAATTACACGCAAGAAAAAACCGCCATCA
+GGCGGCTTGGTGTTCTTTCAGTTCTTCAATTCGAATATTGGTTACGTCTGCATGTGCTATCTGCGCCCATATCATCCAGT
+GGTCGTAGCAGTCGTTGATGTTCTCCGCTTCGATAACTCTGTTGAATGGCTCTCCATTCCATTCTCCTGTGACTCGGAAG
+TGCATTTATCATCTCCATAAAACAAAACCCGCCGTAGCGAGTTCAGATAAAATAAATCCCCGCGAGTGCGAGGATTGTTA
+TGTAATATTGGGTTTAATCATCTATATGTTTTGTACAGAGAGGGCAAGTATCGTTTCCACCGTACTCGTGATAATAATTT
+TGCACGGTATCAGTCATTTCTCGCACATTGCAGAATGGGGATTTGTCTTCATTAGACTTATAAACCTTCATGGAATATTT
+GTATGCCGACTCTATATCTATACCTTCATCTACATAAACACCTTCGTGATGTCTGCATGGAGACAAGACACCGGATCTGC
+ACAACATTGATAACGCCCAATCTTTTTGCTCAGACTCTAACTCATTGATACTCATTTATAAACTCCTTGCAATGTATGTC
+GTTTCAGCTAAACGGTATCAGCAATGTTTATGTAAAGAAACAGTAAGATAATACTCAACCCGATGTTTGAGTACGGTCAT
+CATCTGACACTACAGACTCTGGCATCGCTGTGAAGACGACGCGAAATTCAGCATTTTCACAAGCGTTATCTTTTACAAAA
+CCGATCTCACTCTCCTTTGATGCGAATGCCAGCGTCAGACATCATATGCAGATACTCACCTGCATCCTGAACCCATTGAC
+CTCCAACCCCGTAATAGCGATGCGTAATGATGTCGATAGTTACTAACGGGTCTTGTTCGATTAACTGCCGCAGAAACTCT
+TCCAGGTCACCAGTGCAGTGCTTGATAACAGGAGTCTTCCCAGGATGGCGAACAACAAGAAACTGGTTTCCGTCTTCACG
+GACTTCGTTGCTTTCCAGTTTAGCAATACGCTTACTCCCATCCGAGATAACACCTTCGTAATACTCACGCTGCTCGTTGA
+GTTTTGATTTTGCTGTTTCAAGCTCAACACGCAGTTTCCCTACTGTTAGCGCAATATCCTCGTTCTCCTGGTCGCGGCGT
+TTGATGTATTGCTGGTTTCTTTCCCGTTCATCCAGCAGTTCCAGCACAATCGATGGTGTTACCAATTCATGGAAAAGGTC
+TGCGTCAAATCCCCAGTCGTCATGCATTGCCTGCTCTGCCGCTTCACGCAGTGCCTGAGAGTTAATTTCGCTCACTTCGA
+ACCTCTCTGTTTACTGATAAGTTCCAGATCCTCCTGGCAACTTGCACAAGTCCGACAACCCTGAACGACCAGGCGTCTTC
+GTTCATCTATCGGATCGCCACACTCACAACAATGAGTGGCAGATATAGCCTGGTGGTTCAGGCGGCGCATTTTTATTGCT
+GTGTTGCGCTGTAATTCTTCTATTTCTGATGCTGAATCAATGATGTCTGCCATCTTTCATTAATCCCTGAACTGTTGGTT
+AATACGCTTGAGGGTGAATGCGAATAATAAAAAAGGAGCCTGTAGCTCCCTGATGATTTTGCTTTTCATGTTCATCGTTC
+CTTAAAGACGCCGTTTAACATGCCGATTGCCAGGCTTAAATGAGTCGGTGTGAATCCCATCAGCGTTACCGTTTCGCGGT
+GCTTCTTCAGTACGCTACGGCAAATGTCATCGACGTTTTTATCCGGAAACTGCTGTCTGGCTTTTTTTGATTTCAGAATT
+AGCCTGACGGGCAATGCTGCGAAGGGCGTTTTCCTGCTGAGGTGTCATTGAACAAGTCCCATGTCGGCAAGCATAAGCAC
+ACAGAATATGAAGCCCGCTGCCAGAAAAATGCATTCCGTGGTTGTCATACCTGGTTTCTCTCATCTGCTTCTGCTTTCGC
+CACCATCATTTCCAGCTTTTGTGAAAGGGATGCGGCTAACGTATGAAATTCTTCGTCTGTTTCTACTGGTATTGGCACAA
+ACCTGATTCCAATTTGAGCAAGGCTATGTGCCATCTCGATACTCGTTCTTAACTCAACAGAAGATGCTTTGTGCATACAG
+CCCCTCGTTTATTATTTATCTCCTCAGCCAGCCGCTGTGCTTTCAGTGGATTTCGGATAACAGAAAGGCCGGGAAATACC
+CAGCCTCGCTTTGTAACGGAGTAGACGAAAGTGATTGCGCCTACCCGGATATTATCGTGAGGATGCGTCATCGCCATTGC
+TCCCCAAATACAAAACCAATTTCAGCCAGTGCCTCGTCCATTTTTTCGATGAACTCCGGCACGATCTCGTCAAAACTCGC
+CATGTACTTTTCATCCCGCTCAATCACGACATAATGCAGGCCTTCACGCTTCATACGCGGGTCATAGTTGGCAAAGTACC
+AGGCATTTTTTCGCGTCACCCACATGCTGTACTGCACCTGGGCCATGTAAGCTGACTTTATGGCCTCGAAACCACCGAGC
+CGGAACTTCATGAAATCCCGGGAGGTAAACGGGCATTTCAGTTCAAGGCCGTTGCCGTCACTGCATAAACCATCGGGAGA
+GCAGGCGGTACGCATACTTTCGTCGCGATAGATGATCGGGGATTCAGTAACATTCACGCCGGAAGTGAATTCAAACAGGG
+TTCTGGCGTCGTTCTCGTACTGTTTTCCCCAGGCCAGTGCTTTAGCGTTAACTTCCGGAGCCACACCGGTGCAAACCTCA
+GCAAGCAGGGTGTGGAAGTAGGACATTTTCATGTCAGGCCACTTCTTTCCGGAGCGGGGTTTTGCTATCACGTTGTGAAC
+TTCTGAAGCGGTGATGACGCCGAGCCGTAATTTGTGCCACGCATCATCCCCCTGTTCGACAGCTCTCACATCGATCCCGG
+TACGCTGCAGGATAATGTCCGGTGTCATGCTGCCACCTTCTGCTCTGCGGCTTTCTGTTTCAGGAATCCAAGAGCTTTTA
+CTGCTTCGGCCTGTGTCAGTTCTGACGATGCACGAATGTCGCGGCGAAATATCTGGGAACAGAGCGGCAATAAGTCGTCA
+TCCCATGTTTTATCCAGGGCGATCAGCAGAGTGTTAATCTCCTGCATGGTTTCATCGTTAACCGGAGTGATGTCGCGTTC
+CGGCTGACGTTCTGCAGTGTATGCAGTATTTTCGACAATGCGCTCGGCTTCATCCTTGTCATAGATACCAGCAAATCCGA
+AGGCCAGACGGGCACACTGAATCATGGCTTTATGACGTAACATCCGTTTGGGATGCGACTGCCACGGCCCCGTGATTTCT
+CTGCCTTCGCGAGTTTTGAATGGTTCGCGGCGGCATTCATCCATCCATTCGGTAACGCAGATCGGATGATTACGGTCCTT
+GCGGTAAATCCGGCATGTACAGGATTCATTGTCCTGCTCAAAGTCCATGCCATCAAACTGCTGGTTTTCATTGATGATGC
+GGGACCAGCCATCAACGCCCACCACCGGAACGATGCCATTCTGCTTATCAGGAAAGGCGTAAATTTCTTTCGTCCACGGA
+TTAAGGCCGTACTGGTTGGCAACGATCAGTAATGCGATGAACTGCGCATCGCTGGCATCACCTTTAAATGCCGTCTGGCG
+AAGAGTGGTGATCAGTTCCTGTGGGTCGACAGAATCCATGCCGACACGTTCAGCCAGCTTCCCAGCCAGCGTTGCGAGTG
+CAGTACTCATTCGTTTTATACCTCTGAATCAATATCAACCTGGTGGTGAGCAATGGTTTCAACCATGTACCGGATGTGTT
+CTGCCATGCGCTCCTGAAACTCAACATCGTCATCAAACGCACGGGTAATGGATTTTTTGCTGGCCCCGTGGCGTTGCAAA
+TGATCGATGCATAGCGATTCAAACAGGTGCTGGGGCAGGCCTTTTTCCATGTCGTCTGCCAGTTCTGCCTCTTTCTCTTC
+ACGGGCGAGCTGCTGGTAGTGACGCGCCCAGCTCTGAGCCTCAAGACGATCCTGAATGTAATAAGCGTTCATGGCTGAAC
+TCCTGAAATAGCTGTGAAAATATCGCCCGCGAAATGCCGGGCTGATTAGGAAAACAGGAAAGGGGGTTAGTGAATGCTTT
+TGCTTGATCTCAGTTTCAGTATTAATATCCATTTTTTATAAGCGTCGACGGCTTCACGAAACATCTTTTCATCGCCAATA
+AAAGTGGCGATAGTGAATTTAGTCTGGATAGCCATAAGTGTTTGATCCATTCTTTGGGACTCCTGGCTGATTAAGTATGT
+CGATAAGGCGTTTCCATCCGTCACGTAATTTACGGGTGATTCGTTCAAGTAAAGATTCGGAAGGGCAGCCAGCAACAGGC
+CACCCTGCAATGGCATATTGCATGGTGTGCTCCTTATTTATACATAACGAAAAACGCCTCGAGTGAAGCGTTATTGGTAT
+GCGGTAAAACCGCACTCAGGCGGCCTTGATAGTCATATCATCTGAATCAAATATTCCTGATGTATCGATATCGGTAATTC
+TTATTCCTTCGCTACCATCCATTGGAGGCCATCCTTCCTGACCATTTCCATCATTCCAGTCGAACTCACACACAACACCA
+TATGCATTTAAGTCGCTTGAAATTGCTATAAGCAGAGCATGTTGCGCCAGCATGATTAATACAGCATTTAATACAGAGCC
+GTGTTTATTGAGTCGGTATTCAGAGTCTGACCAGAAATTATTAATCTGGTGAAGTTTTTCCTCTGTCATTACGTCATGGT
+CGATTTCAATTTCTATTGATGCTTTCCAGTCGTAATCAATGATGTATTTTTTGATGTTTGACATCTGTTCATATCCTCAC
+AGATAAAAAATCGCCCTCACACTGGAGGGCAAAGAAGATTTCCAATAATCAGAACAAGTCGGCTCCTGTTTAGTTACGAG
+CGACATTGCTCCGTGTATTCACTCGTTGGAATGAATACACAGTGCAGTGTTTATTCTGTTATTTATGCCAAAAATAAAGG
+CCACTATCAGGCAGCTTTGTTGTTCTGTTTACCAAGTTCTCTGGCAATCATTGCCGTCGTTCGTATTGCCCATTTATCGA
+CATATTTCCCATCTTCCATTACAGGAAACATTTCTTCAGGCTTAACCATGCATTCCGATTGCAGCTTGCATCCATTGCAT
+CGCTTGAATTGTCCACACCATTGATTTTTATCAATAGTCGTAGTCATACGGATAGTCCTGGTATTGTTCCATCACATCCT
+GAGGATGCTCTTCGAACTCTTCAAATTCTTCTTCCATATATCACCTTAAATAGTGGATTGCGGTAGTAAAGATTGTGCCT
+GTCTTTTAACCACATCAGGCTCGGTGGTTCTCGTGTACCCCTACAGCGAGAAATCGGATAAACTATTACAACCCCTACAG
+TTTGATGAGTATAGAAATGGATCCACTCGTTATTCTCGGACGAGTGTTCAGTAATGAACCTCTGGAGAGAACCATGTATA
+TGATCGTTATCTGGGTTGGACTTCTGCTTTTAAGCCCAGATAACTGGCCTGAATATGTTAATGAGAGAATCGGTATTCCT
+CATGTGTGGCATGTTTTCGTCTTTGCTCTTGCATTTTCGCTAGCAATTAATGTGCATCGATTATCAGCTATTGCCAGCGC
+CAGATATAAGCGATTTAAGCTAAGAAAACGCATTAAGATGCAAAACGATAAAGTGCGATCAGTAATTCAAAACCTTACAG
+AAGAGCAATCTATGGTTTTGTGCGCAGCCCTTAATGAAGGCAGGAAGTATGTGGTTACATCAAAACAATTCCCATACATT
+AGTGAGTTGATTGAGCTTGGTGTGTTGAACAAAACTTTTTCCCGATGGAATGGAAAGCATATATTATTCCCTATTGAGGA
+TATTTACTGGACTGAATTAGTTGCCAGCTATGATCCATATAATATTGAGATAAAGCCAAGGCCAATATCTAAGTAACTAG
+ATAAGAGGAATCGATTTTCCCTTAATTTTCTGGCGTCCACTGCATGTTATGCCGCGTTCGCCAGGCTTGCTGTACCATGT
+GCGCTGATTCTTGCGCTCAATACGTTGCAGGTTGCTTTCAATCTGTTTGTGGTATTCAGCCAGCACTGTAAGGTCTATCG
+GATTTAGTGCGCTTTCTACTCGTGATTTCGGTTTGCGATTCAGCGAGAGAATAGGGCGGTTAACTGGTTTTGCGCTTACC
+CCAACCAACAGGGGATTTGCTGCTTTCCATTGAGCCTGTTTCTCTGCGCGACGTTCGCGGCGGCGTGTTTGTGCATCCAT
+CTGGATTCTCCTGTCAGTTAGCTTTGGTGGTGTGTGGCAGTTGTAGTCCTGAACGAAAACCCCCCGCGATTGGCACATTG
+GCAGCTAATCCGGAATCGCACTTACGGCCAATGCTTCGTTTCGTATCACACACCCCAAAGCCTTCTGCTTTGAATGCTGC
+CCTTCTTCAGGGCTTAATTTTTAAGAGCGTCACCTTCATGGTGGTCAGTGCGTCCTGCTGATGTGCTCAGTATCACCGCC
+AGTGGTATTTATGTCAACACCGCCAGAGATAATTTATCACCGCAGATGGTTATCTGTATGTTTTTTATATGAATTTATTT
+TTTGCAGGGGGGCATTGTTTGGTAGGTGAGAGATCTGAATTGCTATGTTTAGTGAGTTGTATCTATTTATTTTTCAATAA
+ATACAATTGGTTATGTGTTTTGGGGGCGATCGTGAGGCAAAGAAAACCCGGCGCTGAGGCCGGGTTATTCTTGTTCTCTG
+GTCAAATTATATAGTTGGAAAACAAGGATGCATATATGAATGAACGATGCAGAGGCAATGCCGATGGCGATAGTGGGTAT
+CATGTAGCCGCTTATGCTGGAAAGAAGCAATAACCCGCAGAAAAACAAAGCTCCAAGCTCAACAAAACTAAGGGCATAGA
+CAATAACTACCGATGTCATATACCCATACTCTCTAATCTTGGCCAGTCGGCGCGTTCTGCTTCCGATTAGAAACGTCAAG
+GCAGCAATCAGGATTGCAATCATGGTTCCTGCATATGATGACAATGTCGCCCCAAGACCATCTCTATGAGCTGAAAAAGA
+AACACCAGGAATGTAGTGGCGGAAAAGGAGATAGCAAATGCTTACGATAACGTAAGGAATTATTACTATGTAAACACCAG
+GCATGATTCTGTTCCGCATAATTACTCCTGATAATTAATCCTTAACTTTGCCCACCTGCCTTTTAAAACATTCCAGTATA
+TCACTTTTCATTCTTGCGTAGCAATATGCCATCTCTTCAGCTATCTCAGCATTGGTGACCTTGTTCAGAGGCGCTGAGAG
+ATGGCCTTTTTCTGATAGATAATGTTCTGTTAAAATATCTCCGGCCTCATCTTTTGCCCGCAGGCTAATGTCTGAAAATT
+GAGGTGACGGGTTAAAAATAATATCCTTGGCAACCTTTTTTATATCCCTTTTAAATTTTGGCTTAATGACTATATCCAAT
+GAGTCAAAAAGCTCCCCTTCAATATCTGTTGCCCCTAAGACCTTTAATATATCGCCAAATACAGGTAGCTTGGCTTCTAC
+CTTCACCGTTGTTCGGCCGATGAAATGCATATGCATAACATCGTCTTTGGTGGTTCCCCTCATCAGTGGCTCTATCTGAA
+CGCGCTCTCCACTGCTTAATGACATTCCTTTCCCGATTAAAAAATCTGTCAGATCGGATGTGGTCGGCCCGAAAACAGTT
+CTGGCAAAACCAATGGTGTCGCCTTCAACAAACAAAAAAGATGGGAATCCCAATGATTCGTCATCTGCGAGGCTGTTCTT
+AATATCTTCAACTGAAGCTTTAGAGCGATTTATCTTCTGAACCAGACTCTTGTCATTTGTTTTGGTAAAGAGAAAAGTTT
+TTCCATCGATTTTATGAATATACAAATAATTGGAGCCAACCTGCAGGTGATGATTATCAGCCAGCAGAGAATTAAGGAAA
+ACAGACAGGTTTATTGAGCGCTTATCTTTCCCTTTATTTTTGCTGCGGTAAGTCGCATAAAAACCATTCTTCATAATTCA
+ATCCATTTACTATGTTATGTTCTGAGGGGAGTGAAAATTCCCCTAATTCGATGAAGATTCTTGCTCAATTGTTATCAGCT
+ATGCGCCGACCAGAACACCTTGCCGATCAGCCAAACGTCTCTTCAGGCCACTGACTAGCGATAACTTTCCCCACAACGGA
+ACAACTCTCATTGCATGGGATCATTGGGTACTGTGGGTTTAGTGGTTGTAAAAACACCTGACCGCTATCCCTGATCAGTT
+TCTTGAAGGTAAACTCATCACCCCCAAGTCTGGCTATGCAGAAATCACCTGGCTCAACAGCCTGCTCAGGGTCAACGAGA
+ATTAACATTCCGTCAGGAAAGCTTGGCTTGGAGCCTGTTGGTGCGGTCATGGAATTACCTTCAACCTCAAGCCAGAATGC
+AGAATCACTGGCTTTTTTGGTTGTGCTTACCCATCTCTCCGCATCACCTTTGGTAAAGGTTCTAAGCTTAGGTGAGAACA
+TCCCTGCCTGAACATGAGAAAAAACAGGGTACTCATACTCACTTCTAAGTGACGGCTGCATACTAACCGCTTCATACATC
+TCGTAGATTTCTCTGGCGATTGAAGGGCTAAATTCTTCAACGCTAACTTTGAGAATTTTTGTAAGCAATGCGGCGTTATA
+AGCATTTAATGCATTGATGCCATTAAATAAAGCACCAACGCCTGACTGCCCCATCCCCATCTTGTCTGCGACAGATTCCT
+GGGATAAGCCAAGTTCATTTTTCTTTTTTTCATAAATTGCTTTAAGGCGACGTGCGTCCTCAAGCTGCTCTTGTGTTAAT
+GGTTTCTTTTTTGTGCTCATACGTTAAATCTATCACCGCAAGGGATAAATATCTAACACCGTGCGTGTTGACTATTTTAC
+CTCTGGCGGTGATAATGGTTGCATGTACTAAGGAGGTTGTATGGAACAACGCATAACCCTGAAAGATTATGCAATGCGCT
+TTGGGCAAACCAAGACAGCTAAAGATCTCGGCGTATATCAAAGCGCGATCAACAAGGCCATTCATGCAGGCCGAAAGATT
+TTTTTAACTATAAACGCTGATGGAAGCGTTTATGCGGAAGAGGTAAAGCCCTTCCCGAGTAACAAAAAAACAACAGCATA
+AATAACCCCGCTCTTACACATTCCAGCCCTGAAAAAGGGCATCAAATTAAACCACACCTATGGTGTATGCATTTATTTGC
+ATACATTCAATCAATTGTTATCTAAGGAAATACTTACATATGGTTCGTGCAAACAAACGCAACGAGGCTCTACGAATCGA
+GAGTGCGTTGCTTAACAAAATCGCAATGCTTGGAACTGAGAAGACAGCGGAAGCTGTGGGCGTTGATAAGTCGCAGATCA
+GCAGGTGGAAGAGGGACTGGATTCCAAAGTTCTCAATGCTGCTTGCTGTTCTTGAATGGGGGGTCGTTGACGACGACATG
+GCTCGATTGGCGCGACAAGTTGCTGCGATTCTCACCAATAAAAAACGCCCGGCGGCAACCGAGCGTTCTGAACAAATCCA
+GATGGAGTTCTGAGGTCATTACTGGATCTATCAACAGGAGTCATTATGACAAATACAGCAAAAATACTCAACTTCGGCAG
+AGGTAACTTTGCCGGACAGGAGCGTAATGTGGCAGATCTCGATGATGGTTACGCCAGACTATCAAATATGCTGCTTGAGG
+CTTATTCGGGCGCAGATCTGACCAAGCGACAGTTTAAAGTGCTGCTTGCCATTCTGCGTAAAACCTATGGGTGGAATAAA
+CCAATGGACAGAATCACCGATTCTCAACTTAGCGAGATTACAAAGTTACCTGTCAAACGGTGCAATGAAGCCAAGTTAGA
+ACTCGTCAGAATGAATATTATCAAGCAGCAAGGCGGCATGTTTGGACCAAATAAAAACATCTCAGAATGGTGCATCCCTC
+AAAACGAGGGAAAATCCCCTAAAACGAGGGATAAAACATCCCTCAAATTGGGGGATTGCTATCCCTCAAAACAGGGGGAC
+ACAAAAGACACTATTACAAAAGAAAAAAGAAAAGATTATTCGTCAGAGAATTCTGGCGAATCCTCTGACCAGCCAGAAAA
+CGACCTTTCTGTGGTGAAACCGGATGCTGCAATTCAGAGCGGCAGCAAGTGGGGGACAGCAGAAGACCTGACCGCCGCAG
+AGTGGATGTTTGACATGGTGAAGACTATCGCACCATCAGCCAGAAAACCGAATTTTGCTGGGTGGGCTAACGATATCCGC
+CTGATGCGTGAACGTGACGGACGTAACCACCGCGACATGTGTGTGCTGTTCCGCTGGGCATGCCAGGACAACTTCTGGTC
+CGGTAACGTGCTGAGCCCGGCCAAACTCCGCGATAAGTGGACCCAACTCGAAATCAACCGTAACAAGCAACAGGCAGGCG
+TGACAGCCAGCAAACCAAAACTCGACCTGACAAACACAGACTGGATTTACGGGGTGGATCTATGAAAAACATCGCCGCAC
+AGATGGTTAACTTTGACCGTGAGCAGATGCGTCGGATCGCCAACAACATGCCGGAACAGTACGACGAAAAGCCGCAGGTA
+CAGCAGGTAGCGCAGATCATCAACGGTGTGTTCAGCCAGTTACTGGCAACTTTCCCGGCGAGCCTGGCTAACCGTGACCA
+GAACGAAGTGAACGAAATCCGTCGCCAGTGGGTTCTGGCTTTTCGGGAAAACGGGATCACCACGATGGAACAGGTTAACG
+CAGGAATGCGCGTAGCCCGTCGGCAGAATCGACCATTTCTGCCATCACCCGGGCAGTTTGTTGCATGGTGCCGGGAAGAA
+GCATCCGTTACCGCCGGACTGCCAAACGTCAGCGAGCTGGTTGATATGGTTTACGAGTATTGCCGGAAGCGAGGCCTGTA
+TCCGGATGCGGAGTCTTATCCGTGGAAATCAAACGCGCACTACTGGCTGGTTACCAACCTGTATCAGAACATGCGGGCCA
+ATGCGCTTACTGATGCGGAATTACGCCGTAAGGCCGCAGATGAGCTTGTCCATATGACTGCGAGAATTAACCGTGGTGAG
+GCGATCCCTGAACCAGTAAAACAACTTCCTGTCATGGGCGGTAGACCTCTAAATCGTGCACAGGCTCTGGCGAAGATCGC
+AGAAATCAAAGCTAAGTTCGGACTGAAAGGAGCAAGTGTATGACGGGCAAAGAGGCAATTATTCATTACCTGGGGACGCA
+TAATAGCTTCTGTGCGCCGGACGTTGCCGCGCTAACAGGCGCAACAGTAACCAGCATAAATCAGGCCGCGGCTAAAATGG
+CACGGGCAGGTCTTCTGGTTATCGAAGGTAAGGTCTGGCGAACGGTGTATTACCGGTTTGCTACCAGGGAAGAACGGGAA
+GGAAAGATGAGCACGAACCTGGTTTTTAAGGAGTGTCGCCAGAGTGCCGCGATGAAACGGGTATTGGCGGTATATGGAGT
+TAAAAGATGACCATCTACATTACTGAGCTAATAACAGGCCTGCTGGTAATCGCAGGCCTTTTTATTTGGGGGAGAGGGAA
+GTCATGAAAAAACTAACCTTTGAAATTCGATCTCCAGCACATCAGCAAAACGCTATTCACGCAGTACAGCAAATCCTTCC
+AGACCCAACCAAACCAATCGTAGTAACCATTCAGGAACGCAACCGCAGCTTAGACCAAAACAGGAAGCTATGGGCCTGCT
+TAGGTGACGTCTCTCGTCAGGTTGAATGGCATGGTCGCTGGCTGGATGCAGAAAGCTGGAAGTGTGTGTTTACCGCAGCA
+TTAAAGCAGCAGGATGTTGTTCCTAACCTTGCCGGGAATGGCTTTGTGGTAATAGGCCAGTCAACCAGCAGGATGCGTGT
+AGGCGAATTTGCGGAGCTATTAGAGCTTATACAGGCATTCGGTACAGAGCGTGGCGTTAAGTGGTCAGACGAAGCGAGAC
+TGGCTCTGGAGTGGAAAGCGAGATGGGGAGACAGGGCTGCATGATAAATGTCGTTAGTTTCTCCGGTGGCAGGACGTCAG
+CATATTTGCTCTGGCTAATGGAGCAAAAGCGACGGGCAGGTAAAGACGTGCATTACGTTTTCATGGATACAGGTTGTGAA
+CATCCAATGACATATCGGTTTGTCAGGGAAGTTGTGAAGTTCTGGGATATACCGCTCACCGTATTGCAGGTTGATATCAA
+CCCGGAGCTTGGACAGCCAAATGGTTATACGGTATGGGAACCAAAGGATATTCAGACGCGAATGCCTGTTCTGAAGCCAT
+TTATCGATATGGTAAAGAAATATGGCACTCCATACGTCGGCGGCGCGTTCTGCACTGACAGATTAAAACTCGTTCCCTTC
+ACCAAATACTGTGATGACCATTTCGGGCGAGGGAATTACACCACGTGGATTGGCATCAGAGCTGATGAACCGAAGCGGCT
+AAAGCCAAAGCCTGGAATCAGATATCTTGCTGAACTGTCAGACTTTGAGAAGGAAGATATCCTCGCATGGTGGAAGCAAC
+AACCATTCGATTTGCAAATACCGGAACATCTCGGTAACTGCATATTCTGCATTAAAAAATCAACGCAAAAAATCGGACTT
+GCCTGCAAAGATGAGGAGGGATTGCAGCGTGTTTTTAATGAGGTCATCACGGGATCCCATGTGCGTGACGGACATCGGGA
+AACGCCAAAGGAGATTATGTACCGAGGAAGAATGTCGCTGGACGGTATCGCGAAAATGTATTCAGAAAATGATTATCAAG
+CCCTGTATCAGGACATGGTACGAGCTAAAAGATTCGATACCGGCTCTTGTTCTGAGTCATGCGAAATATTTGGAGGGCAG
+CTTGATTTCGACTTCGGGAGGGAAGCTGCATGATGCGATGTTATCGGTGCGGTGAATGCAAAGAAGATAACCGCTTCCGA
+CCAAATCAACCTTACTGGAATCGATGGTGTCTCCGGTGTGAAAGAACACCAACAGGGGTGTTACCACTACCGCAGGAAAA
+GGAGGACGTGTGGCGAGACAGCGACGAAGTATCACCGACATAATCTGCGAAAACTGCAAATACCTTCCAACGAAACGCAC
+CAGAAATAAACCCAAGCCAATCCCAAAAGAATCTGACGTAAAAACCTTCAACTACACGGCTCACCTGTGGGATATCCGGT
+GGCTAAGACGTCGTGCGAGGAAAACAAGGTGATTGACCAAAATCGAAGTTACGAACAAGAAAGCGTCGAGCGAGCTTTAA
+CGTGCGCTAACTGCGGTCAGAAGCTGCATGTGCTGGAAGTTCACGTGTGTGAGCACTGCTGCGCAGAACTGATGAGCGAT
+CCGAATAGCTCGATGCACGAGGAAGAAGATGATGGCTAAACCAGCGCGAAGACGATGTAAAAACGATGAATGCCGGGAAT
+GGTTTCACCCTGCATTCGCTAATCAGTGGTGGTGCTCTCCAGAGTGTGGAACCAAGATAGCACTCGAACGACGAAGTAAA
+GAACGCGAAAAAGCGGAAAAAGCAGCAGAGAAGAAACGACGACGAGAGGAGCAGAAACAGAAAGATAAACTTAAGATTCG
+AAAACTCGCCTTAAAGCCCCGCAGTTACTGGATTAAACAAGCCCAACAAGCCGTAAACGCCTTCATCAGAGAAAGAGACC
+GCGACTTACCATGTATCTCGTGCGGAACGCTCACGTCTGCTCAGTGGGATGCCGGACATTACCGGACAACTGCTGCGGCA
+CCTCAACTCCGATTTAATGAACGCAATATTCACAAGCAATGCGTGGTGTGCAACCAGCACAAAAGCGGAAATCTCGTTCC
+GTATCGCGTCGAACTGATTAGCCGCATCGGGCAGGAAGCAGTAGACGAAATCGAATCAAACCATAACCGCCATCGCTGGA
+CTATCGAAGAGTGCAAGGCGATCAAGGCAGAGTACCAACAGAAACTCAAAGACCTGCGAAATAGCAGAAGTGAGGCCGCA
+TGACGTTCTCAGTAAAAACCATTCCAGACATGCTCGTTGAAACATACGGAAATCAGACAGAAGTAGCACGCAGACTGAAA
+TGTAGTCGCGGTACGGTCAGAAAATACGTTGATGATAAAGACGGGAAAATGCACGCCATCGTCAACGACGTTCTCATGGT
+TCATCGCGGATGGAGTGAAAGAGATGCGCTATTACGAAAAAATTGATGGCAGCAAATACCGAAATATTTGGGTAGTTGGC
+GATCTGCACGGATGCTACACGAACCTGATGAACAAACTGGATACGATTGGATTCGACAACAAAAAAGACCTGCTTATCTC
+GGTGGGCGATTTGGTTGATCGTGGTGCAGAGAACGTTGAATGCCTGGAATTAATCACATTCCCCTGGTTCAGAGCTGTAC
+GTGGAAACCATGAGCAAATGATGATTGATGGCTTATCAGAGCGTGGAAACGTTAATCACTGGCTGCTTAATGGCGGTGGC
+TGGTTCTTTAATCTCGATTACGACAAAGAAATTCTGGCTAAAGCTCTTGCCCATAAAGCAGATGAACTTCCGTTAATCAT
+CGAACTGGTGAGCAAAGATAAAAAATATGTTATCTGCCACGCCGATTATCCCTTTGACGAATACGAGTTTGGAAAGCCAG
+TTGATCATCAGCAGGTAATCTGGAACCGCGAACGAATCAGCAACTCACAAAACGGGATCGTGAAAGAAATCAAAGGCGCG
+GACACGTTCATCTTTGGTCATACGCCAGCAGTGAAACCACTCAAGTTTGCCAACCAAATGTATATCGATACCGGCGCAGT
+GTTCTGCGGAAACCTAACATTGATTCAGGTACAGGGAGAAGGCGCATGAGACTCGAAAGCGTAGCTAAATTTCATTCGCC
+AAAAAGCCCGATGATGAGCGACTCACCACGGGCCACGGCTTCTGACTCTCTTTCCGGTACTGATGTGATGGCTGCTATGG
+GGATGGCGCAATCACAAGCCGGATTCGGTATGGCTGCATTCTGCGGTAAGCACGAACTCAGCCAGAACGACAAACAAAAG
+GCTATCAACTATCTGATGCAATTTGCACACAAGGTATCGGGGAAATACCGTGGTGTGGCAAAGCTTGAAGGAAATACTAA
+GGCAAAGGTACTGCAAGTGCTCGCAACATTCGCTTATGCGGATTATTGCCGTAGTGCCGCGACGCCGGGGGCAAGATGCA
+GAGATTGCCATGGTACAGGCCGTGCGGTTGATATTGCCAAAACAGAGCTGTGGGGGAGAGTTGTCGAGAAAGAGTGCGGA
+AGATGCAAAGGCGTCGGCTATTCAAGGATGCCAGCAAGCGCAGCATATCGCGCTGTGACGATGCTAATCCCAAACCTTAC
+CCAACCCACCTGGTCACGCACTGTTAAGCCGCTGTATGACGCTCTGGTGGTGCAATGCCACAAAGAAGAGTCAATCGCAG
+ACAACATTTTGAATGCGGTCACACGTTAGCAGCATGATTGCCACGGATGGCAACATATTAACGGCATGATATTGACTTAT
+TGAATAAAATTGGGTAAATTTGACTCAACGATGGGTTAATTCGCTCGTTGTGGTAGTGAGATGAAAAGAGGCGGCGCTTA
+CTACCGATTCCGCCTAGTTGGTCACTTCGACGTATCGTCTGGAACTCCAACCATCGCAGGCAGAGAGGTCTGCAAAATGC
+AATCCCGAAACAGTTCGCAGGTAATAGTTAGAGCCTGCATAACGGTTTCGGGATTTTTTATATCTGCACAACAGGTAAGA
+GCATTGAGTCGATAATCGTGAAGAGTCGGCGAGCCTGGTTAGCCAGTGCTCTTTCCGTTGTGCTGAATTAAGCGAATACC
+GGAAGCAGAACCGGATCACCAAATGCGTACAGGCGTCATCGCCGCCCAGCAACAGCACAACCCAAACTGAGCCGTAGCCA
+CTGTCTGTCCTGAATTCATTAGTAATAGTTACGCTGCGGCCTTTTACACATGACCTTCGTGAAAGCGGGTGGCAGGAGGT
+CGCGCTAACAACCTCCTGCCGTTTTGCCCGTGCATATCGGTCACGAACAAATCTGATTACTAAACACAGTAGCCTGGATT
+TGTTCTATCAGTAATCGACCTTATTCCTAATTAAATAGAGCAAATCCCCTTATTGGGGGTAAGACATGAAGATGCCAGAA
+AAACATGACCTGTTGGCCGCCATTCTCGCGGCAAAGGAACAAGGCATCGGGGCAATCCTTGCGTTTGCAATGGCGTACCT
+TCGCGGCAGATATAATGGCGGTGCGTTTACAAAAACAGTAATCGACGCAACGATGTGCGCCATTATCGCCTAGTTCATTC
+GTGACCTTCTCGACTTCGCCGGACTAAGTAGCAATCTCGCTTATATAACGAGCGTGTTTATCGGCTACATCGGTACTGAC
+TCGATTGGTTCGCTTATCAAACGCTTCGCTGCTAAAAAAGCCGGAGTAGAAGATGGTAGAAATCAATAATCAACGTAAGG
+CGTTCCTCGATATGCTGGCGTGGTCGGAGGGAACTGATAACGGACGTCAGAAAACCAGAAATCATGGTTATGACGTCATT
+GTAGGCGGAGAGCTATTTACTGATTACTCCGATCACCCTCGCAAACTTGTCACGCTAAACCCAAAACTCAAATCAACAGG
+CGCCGGACGCTACCAGCTTCTTTCCCGTTGGTGGGATGCCTACCGCAAGCAGCTTGGCCTGAAAGACTTCTCTCCGAAAA
+GTCAGGACGCTGTGGCATTGCAGCAGATTAAGGAGCGTGGCGCTTTACCTATGATTGATCGTGGTGATATCCGTCAGGCA
+ATCGACCGTTGCAGCAATATCTGGGCTTCACTGCCGGGCGCTGGTTATGGTCAGTTCGAGCATAAGGCTGACAGCCTGAT
+TGCAAAATTCAAAGAAGCGGGCGGAACGGTCAGAGAGATTGATGTATGAGCAGAGTCACCGCGATTATCTCCGCTCTGGT
+TATCTGCATCATCGTCTGCCTGTCATGGGCTGTTAATCATTACCGTGATAACGCCATTACCTACAAAGCCCAGCGCGACA
+AAAATGCCAGAGAACTGAAGCTGGCGAACGCGGCAATTACTGACATGCAGATGCGTCAGCGTGATGTTGCTGCGCTCGAT
+GCAAAATACACGAAGGAGTTAGCTGATGCTAAAGCTGAAAATGATGCTCTGCGTGATGATGTTGCCGCTGGTCGTCGTCG
+GTTGCACATCAAAGCAGTCTGTCAGTCAGTGCGTGAAGCCACCACCGCCTCCGGCGTGGATAATGCAGCCTCCCCCCGAC
+TGGCAGACACCGCTGAACGGGATTATTTCACCCTCAGAGAGAGGCTGATCACTATGCAAAAACAACTGGAAGGAACCCAG
+AAGTATATTAATGAGCAGTGCAGATAGAGTTGCCCATATCGATGGGCAACTCATGCAATTATTGTGAGCAATACACACGC
+GCTTCCAGCGGAGTATAAATGCCTAAAGTAATAAAACCGAGCAATCCATTTACGAATGTTTGCTGGGTTTCTGTTTTAAC
+AACATTTTCTGCGCCGCCACAAATTTTGGCTGCATCGACAGTTTTCTTCTGCCCAATTCCAGAAACGAAGAAATGATGGG
+TGATGGTTTCCTTTGGTGCTACTGCTGCCGGTTTGTTTTGAACAGTAAACGTCTGTTGAGCACATCCTGTAATAAGCAGG
+GCCAGCGCAGTAGCGAGTAGCATTTTTTTCATGGTGTTATTCCCGATGCTTTTTGAAGTTCGCAGAATCGTATGTGTAGA
+AAATTAAACAAACCCTAAACAATGAGTTGAAATTTCATATTGTTAATATTTATTAATGTATGTCAGGTGCGATGAATCGT
+CATTGTATTCCCGGATTAACTATGTCCACAGCCCTGACGGGGAACTTCTCTGCGGGAGTGTCCGGGAATAATTAAAACGA
+TGCACACAGGGTTTAGCGCGTACACGTATTGCATTATGCCAACGCCCCGGTGCTGACACGGAAGAAACCGGACGTTATGA
+TTTAGCGTGGAAAGATTTGTGTAGTGTTCTGAATGCTCTCAGTAAATAGTAATGAATTATCAAAGGTATAGTAATATCTT
+TTATGTTCATGGATATTTGTAACCCATCGGAAAACTCCTGCTTTAGCAAGATTTTCCCTGTATTGCTGAAATGTGATTTC
+TCTTGATTTCAACCTATCATAGGACGTTTCTATAAGATGCGTGTTTCTTGAGAATTTAACATTTACAACCTTTTTAAGTC
+CTTTTATTAACACGGTGTTATCGTTTTCTAACACGATGTGAATATTATCTGTGGCTAGATAGTAAATATAATGTGAGACG
+TTGTGACGTTTTAGTTCAGAATAAAACAATTCACAGTCTAAATCTTTTCGCACTTGATCGAATATTTCTTTAAAAATGGC
+AACCTGAGCCATTGGTAAAACCTTCCATGTGATACGAGGGCGCGTAGTTTGCATTATCGTTTTTATCGTTTCAATCTGGT
+CTGACCTCCTTGTGTTTTGTTGATGATTTATGTCAAATATTAGGAATGTTTTCACTTAATAGTATTGGTTGCGTAACAAA
+GTGCGGTCCTGCTGGCATTCTGGAGGGAAATACAACCGACAGATGTATGTAAGGCCAACGTGCTCAAATCTTCATACAGA
+AAGATTTGAAGTAATATTTTAACCGCTAGATGAAGAGCAAGCGCATGGAGCGACAAAATGAATAAAGAACAATCTGCTGA
+TGATCCCTCCGTGGATCTGATTCGTGTAAAAAATATGCTTAATAGCACCATTTCTATGAGTTACCCTGATGTTGTAATTG
+CATGTATAGAACATAAGGTGTCTCTGGAAGCATTCAGAGCAATTGAGGCAGCGTTGGTGAAGCACGATAATAATATGAAG
+GATTATTCCCTGGTGGTTGACTGATCACCATAACTGCTAATCATTCAAACTATTTAGTCTGTGACAGAGCCAACACGCAG
+TCTGTCACTGTCAGGAAAGTGGTAAAACTGCAACTCAATTACTGCAATGCCCTCGTAATTAAGTGAATTTACAATATCGT
+CCTGTTCGGAGGGAAGAACGCGGGATGTTCATTCTTCATCACTTTTAATTGATGTATATGCTCTCTTTTCTGACGTTAGT
+CTCCGACGGCAGGCTTCAATGACCCAGGCTGAGAAATTCCCGGACCCTTTTTGCTCAAGAGCGATGTTAATTTGTTCAAT
+CATTTGGTTAGGAAAGCGGATGTTGCGGGTTGTTGTTCTGCGGGTTCTGTTCTTCGTTGACATGAGGTTGCCCCGTATTC
+AGTGTCGCTGATTTGTATTGTCTGAAGTTGTTTTTACGTTAAGTTGATGCAGATCAATTAATACGATACCTGCGTCATAA
+TTGATTATTTGACGTGGTTTGATGGCCTCCACGCACGTTGTGATATGTAGATGATAATCATTATCACTTTACGGGTCCTT
+TCCGGTGATCCGACAGGTTACG
--- /dev/null
+lambda_NEB3011 48502 16 80 81
--- /dev/null
+aligned.bam
+aligned2.bam
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="production.subreads.bam">
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5195"
+ TimeStampedName="scraps_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.ScrapsBamFile"
+ ResourceId="production.scraps.bam">
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="qStart" Value="4000" Operator=">"/>
+ <pbbase:Property Name="qStart" Value="5000" Operator="<"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="./production.subreads.bam">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5195"
+ TimeStampedName="scraps_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.ScrapsBamFile"
+ ResourceId="./production.scraps.bam">
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="./internal.subreads.bam">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5195"
+ TimeStampedName="scraps_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.ScrapsBamFile"
+ ResourceId="./internal.scraps.bam">
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5197"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.HqRegionBamFile"
+ ResourceId="./production_hq.hqregion.bam">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5199"
+ TimeStampedName="scraps_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.HqScrapsBamFile"
+ ResourceId="./production_hq.scraps.bam">
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="zm" Value="100000" Operator="=="/>
+ </pbbase:Properties>
+ </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="./production.subreads.bam">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5195"
+ TimeStampedName="scraps_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.ScrapsBamFile"
+ ResourceId="./production.scraps.bam">
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5197"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.HqRegionBamFile"
+ ResourceId="./production_hq.hqregion.bam">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5199"
+ TimeStampedName="scraps_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.HqScrapsBamFile"
+ ResourceId="./production_hq.scraps.bam">
+ </pbbase:ExternalResource>
+ </pbbase:ExternalResources>
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+</pbds:SubreadSet>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="production.subreads.bam">
+ </pbbase:ExternalResource>
+ <pbbase:ExternalResource
+ UniqueId="b096d0a3-94b8-4918-b3af-a3f81bbe5195"
+ TimeStampedName="scraps_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.ScrapsBamFile"
+ ResourceId="production.scraps.bam">
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="qname_file" Value="qname_whitelist.txt" Operator="="/>
+ </pbbase:Properties>
+ </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
--- /dev/null
+ArminsFakeMovie/0/3116_3628
+ArminsFakeMovie/0/3722_4267
+ArminsFakeMovie/0/6812_7034
--- /dev/null
+a/test.bam
+b/test1.bam
+b/test2.bam
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" MetaType="PacBio.DataSet.AlignmentSet" Name="DataSet_AlignmentSet" Tags="barcode moreTags mapping mytags" UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c" Version="2.3.0" CreatedAt="2015-01-27T09:00:01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+ <pbbase:ExternalResources>
+ <pbbase:ExternalResource MetaType="SubreadFile.SubreadBamFile" ResourceId="./a/test.bam" />
+ <pbbase:ExternalResource MetaType="SubreadFile.SubreadBamFile" ResourceId="./b/test1.bam" />
+ <pbbase:ExternalResource MetaType="SubreadFile.SubreadBamFile" ResourceId="./b/test2.bam"/>
+ </pbbase:ExternalResources>
+</pbds:SubreadSet>
--- /dev/null
+a/test.bam
+b/test1.bam
+b/test2.bam
+relative.xml
--- /dev/null
+@PacBioBAM_TestsDir@/data/test_group_query/test1.bam
+@PacBioBAM_TestsDir@/data/test_group_query/test2.bam
+@PacBioBAM_TestsDir@/data/test_group_query/test3.bam
--- /dev/null
+# test case headers
+set( PacBioBAMTest_H
+
+)
+
+# test case sources
+set( PacBioBAMTest_CPP
+
+ ${PacBioBAM_TestsDir}/src/test_Accuracy.cpp
+ ${PacBioBAM_TestsDir}/src/test_AlignmentPrinter.cpp
+ ${PacBioBAM_TestsDir}/src/test_BamFile.cpp
+ ${PacBioBAM_TestsDir}/src/test_BamHeader.cpp
+ ${PacBioBAM_TestsDir}/src/test_BamRecord.cpp
+ ${PacBioBAM_TestsDir}/src/test_BamRecordBuilder.cpp
+ ${PacBioBAM_TestsDir}/src/test_BamRecordClipping.cpp
+ ${PacBioBAM_TestsDir}/src/test_BamRecordImplCore.cpp
+ ${PacBioBAM_TestsDir}/src/test_BamRecordImplTags.cpp
+ ${PacBioBAM_TestsDir}/src/test_BamRecordImplVariableData.cpp
+ ${PacBioBAM_TestsDir}/src/test_BamRecordMapping.cpp
+ ${PacBioBAM_TestsDir}/src/test_BamWriter.cpp
+ ${PacBioBAM_TestsDir}/src/test_BarcodeQuery.cpp
+ ${PacBioBAM_TestsDir}/src/test_Cigar.cpp
+ ${PacBioBAM_TestsDir}/src/test_Compare.cpp
+ ${PacBioBAM_TestsDir}/src/test_DataSetCore.cpp
+ ${PacBioBAM_TestsDir}/src/test_DataSetIO.cpp
+ ${PacBioBAM_TestsDir}/src/test_DataSetQuery.cpp
+ ${PacBioBAM_TestsDir}/src/test_DataSetXsd.cpp
+ ${PacBioBAM_TestsDir}/src/test_EndToEnd.cpp
+ ${PacBioBAM_TestsDir}/src/test_EntireFileQuery.cpp
+ ${PacBioBAM_TestsDir}/src/test_Fasta.cpp
+ ${PacBioBAM_TestsDir}/src/test_FileUtils.cpp
+ ${PacBioBAM_TestsDir}/src/test_Frames.cpp
+ ${PacBioBAM_TestsDir}/src/test_GenomicIntervalQuery.cpp
+ ${PacBioBAM_TestsDir}/src/test_IndexedFastaReader.cpp
+ ${PacBioBAM_TestsDir}/src/test_Intervals.cpp
+ ${PacBioBAM_TestsDir}/src/test_PacBioIndex.cpp
+ ${PacBioBAM_TestsDir}/src/test_PbiFilter.cpp
+ ${PacBioBAM_TestsDir}/src/test_PbiFilterQuery.cpp
+ ${PacBioBAM_TestsDir}/src/test_QNameQuery.cpp
+ ${PacBioBAM_TestsDir}/src/test_QualityValues.cpp
+ ${PacBioBAM_TestsDir}/src/test_Pulse2BaseCache.cpp
+ ${PacBioBAM_TestsDir}/src/test_ReadAccuracyQuery.cpp
+ ${PacBioBAM_TestsDir}/src/test_ReadGroupInfo.cpp
+ ${PacBioBAM_TestsDir}/src/test_SamWriter.cpp
+ ${PacBioBAM_TestsDir}/src/test_SequenceUtils.cpp
+ ${PacBioBAM_TestsDir}/src/test_StringUtils.cpp
+ ${PacBioBAM_TestsDir}/src/test_SubreadLengthQuery.cpp
+ ${PacBioBAM_TestsDir}/src/test_Tags.cpp
+ ${PacBioBAM_TestsDir}/src/test_TimeUtils.cpp
+ # ${PacBioBAM_TestsDir}/src/test_UnmappedReadsQuery.cpp
+ ${PacBioBAM_TestsDir}/src/test_Validator.cpp
+ ${PacBioBAM_TestsDir}/src/test_Version.cpp
+ ${PacBioBAM_TestsDir}/src/test_WhitelistedZmwReadStitcher.cpp
+ ${PacBioBAM_TestsDir}/src/test_ZmwReadStitcher.cpp
+ ${PacBioBAM_TestsDir}/src/test_ZmwQuery.cpp
+)
--- /dev/null
+#!/usr/bin/env python
+"""Functional testing framework for command line applications"""
+
+import difflib
+import itertools
+import optparse
+import os
+import re
+import signal
+import subprocess
+import sys
+import shutil
+import time
+import tempfile
+
+try:
+ import configparser
+except ImportError:
+ import ConfigParser as configparser
+
+__all__ = ['main', 'test']
+
+def findtests(paths):
+ """Yield tests in paths in sorted order"""
+ for p in paths:
+ if os.path.isdir(p):
+ for root, dirs, files in os.walk(p):
+ if os.path.basename(root).startswith('.'):
+ continue
+ for f in sorted(files):
+ if not f.startswith('.') and f.endswith('.t'):
+ yield os.path.normpath(os.path.join(root, f))
+ else:
+ yield os.path.normpath(p)
+
+def regex(pattern, s):
+ """Match a regular expression or return False if invalid.
+
+ >>> [bool(regex(r, 'foobar')) for r in ('foo.*', '***')]
+ [True, False]
+ """
+ try:
+ return re.match(pattern + r'\Z', s)
+ except re.error:
+ return False
+
+def glob(el, l):
+ r"""Match a glob-like pattern.
+
+ The only supported special characters are * and ?. Escaping is
+ supported.
+
+ >>> bool(glob(r'\* \\ \? fo?b*', '* \\ ? foobar'))
+ True
+ """
+ i, n = 0, len(el)
+ res = ''
+ while i < n:
+ c = el[i]
+ i += 1
+ if c == '\\' and el[i] in '*?\\':
+ res += el[i - 1:i + 1]
+ i += 1
+ elif c == '*':
+ res += '.*'
+ elif c == '?':
+ res += '.'
+ else:
+ res += re.escape(c)
+ return regex(res, l)
+
+annotations = {'glob': glob, 're': regex}
+
+def match(el, l):
+ """Match patterns based on annotations"""
+ for k in annotations:
+ ann = ' (%s)\n' % k
+ if el.endswith(ann) and annotations[k](el[:-len(ann)], l[:-1]):
+ return True
+ return False
+
+class SequenceMatcher(difflib.SequenceMatcher, object):
+ """Like difflib.SequenceMatcher, but matches globs and regexes"""
+
+ def find_longest_match(self, alo, ahi, blo, bhi):
+ """Find longest matching block in a[alo:ahi] and b[blo:bhi]"""
+ # SequenceMatcher uses find_longest_match() to slowly whittle down
+ # the differences between a and b until it has each matching block.
+ # Because of this, we can end up doing the same matches many times.
+ matches = []
+ for n, (el, line) in enumerate(zip(self.a[alo:ahi], self.b[blo:bhi])):
+ if el != line and match(el, line):
+ # This fools the superclass's method into thinking that the
+ # regex/glob in a is identical to b by replacing a's line (the
+ # expected output) with b's line (the actual output).
+ self.a[alo + n] = line
+ matches.append((n, el))
+ ret = super(SequenceMatcher, self).find_longest_match(alo, ahi,
+ blo, bhi)
+ # Restore the lines replaced above. Otherwise, the diff output
+ # would seem to imply that the tests never had any regexes/globs.
+ for n, el in matches:
+ self.a[alo + n] = el
+ return ret
+
+def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
+ tofiledate='', n=3, lineterm='\n', matcher=SequenceMatcher):
+ """Compare two sequences of lines; generate the delta as a unified diff.
+
+ This is like difflib.unified_diff(), but allows custom matchers.
+ """
+ started = False
+ for group in matcher(None, a, b).get_grouped_opcodes(n):
+ if not started:
+ fromdate = fromfiledate and '\t%s' % fromfiledate or ''
+ todate = fromfiledate and '\t%s' % tofiledate or ''
+ yield '--- %s%s%s' % (fromfile, fromdate, lineterm)
+ yield '+++ %s%s%s' % (tofile, todate, lineterm)
+ started = True
+ i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
+ yield "@@ -%d,%d +%d,%d @@%s" % (i1 + 1, i2 - i1, j1 + 1, j2 - j1,
+ lineterm)
+ for tag, i1, i2, j1, j2 in group:
+ if tag == 'equal':
+ for line in a[i1:i2]:
+ yield ' ' + line
+ continue
+ if tag == 'replace' or tag == 'delete':
+ for line in a[i1:i2]:
+ yield '-' + line
+ if tag == 'replace' or tag == 'insert':
+ for line in b[j1:j2]:
+ yield '+' + line
+
+needescape = re.compile(r'[\x00-\x09\x0b-\x1f\x7f-\xff]').search
+escapesub = re.compile(r'[\x00-\x09\x0b-\x1f\\\x7f-\xff]').sub
+escapemap = dict((chr(i), r'\x%02x' % i) for i in range(256))
+escapemap.update({'\\': '\\\\', '\r': r'\r', '\t': r'\t'})
+
+def escape(s):
+ """Like the string-escape codec, but doesn't escape quotes"""
+ return escapesub(lambda m: escapemap[m.group(0)], s[:-1]) + ' (esc)\n'
+
+def makeresetsigpipe():
+ """Make a function to reset SIGPIPE to SIG_DFL (for use in subprocesses).
+
+ Doing subprocess.Popen(..., preexec_fn=makeresetsigpipe()) will prevent
+ Python's SIGPIPE handler (SIG_IGN) from being inherited by the
+ child process.
+ """
+ if sys.platform == 'win32' or getattr(signal, 'SIGPIPE', None) is None:
+ return None
+ return lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+
+def test(path, shell, indent=2):
+ """Run test at path and return input, output, and diff.
+
+ This returns a 3-tuple containing the following:
+
+ (list of lines in test, same list with actual output, diff)
+
+ diff is a generator that yields the diff between the two lists.
+
+ If a test exits with return code 80, the actual output is set to
+ None and diff is set to [].
+ """
+ indent = ' ' * indent
+ cmdline = '%s$ ' % indent
+ conline = '%s> ' % indent
+
+ f = open(path)
+ abspath = os.path.abspath(path)
+ env = os.environ.copy()
+ env['TESTDIR'] = os.path.dirname(abspath)
+ env['TESTFILE'] = os.path.basename(abspath)
+ p = subprocess.Popen([shell, '-'], bufsize=-1, stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+ universal_newlines=True, env=env,
+ preexec_fn=makeresetsigpipe(),
+ close_fds=os.name == 'posix')
+ salt = 'CRAM%s' % time.time()
+
+ after = {}
+ refout, postout = [], []
+ i = pos = prepos = -1
+ stdin = []
+ for i, line in enumerate(f):
+ refout.append(line)
+ if line.startswith(cmdline):
+ after.setdefault(pos, []).append(line)
+ prepos = pos
+ pos = i
+ stdin.append('echo "\n%s %s $?"\n' % (salt, i))
+ stdin.append(line[len(cmdline):])
+ elif line.startswith(conline):
+ after.setdefault(prepos, []).append(line)
+ stdin.append(line[len(conline):])
+ elif not line.startswith(indent):
+ after.setdefault(pos, []).append(line)
+ stdin.append('echo "\n%s %s $?"\n' % (salt, i + 1))
+
+ output = p.communicate(input=''.join(stdin))[0]
+ if p.returncode == 80:
+ return (refout, None, [])
+
+ # Add a trailing newline to the input script if it's missing.
+ if refout and not refout[-1].endswith('\n'):
+ refout[-1] += '\n'
+
+ # We use str.split instead of splitlines to get consistent
+ # behavior between Python 2 and 3. In 3, we use unicode strings,
+ # which has more line breaks than \n and \r.
+ pos = -1
+ ret = 0
+ for i, line in enumerate(output[:-1].split('\n')):
+ line += '\n'
+ if line.startswith(salt):
+ presalt = postout.pop()
+ if presalt != '%s\n' % indent:
+ postout.append(presalt[:-1] + ' (no-eol)\n')
+ ret = int(line.split()[2])
+ if ret != 0:
+ postout.append('%s[%s]\n' % (indent, ret))
+ postout += after.pop(pos, [])
+ pos = int(line.split()[1])
+ else:
+ if needescape(line):
+ line = escape(line)
+ postout.append(indent + line)
+ postout += after.pop(pos, [])
+
+ diffpath = os.path.basename(abspath)
+ diff = unified_diff(refout, postout, diffpath, diffpath + '.err')
+ for firstline in diff:
+ return refout, postout, itertools.chain([firstline], diff)
+ return refout, postout, []
+
+def prompt(question, answers, auto=None):
+ """Write a prompt to stdout and ask for answer in stdin.
+
+ answers should be a string, with each character a single
+ answer. An uppercase letter is considered the default answer.
+
+ If an invalid answer is given, this asks again until it gets a
+ valid one.
+
+ If auto is set, the question is answered automatically with the
+ specified value.
+ """
+ default = [c for c in answers if c.isupper()]
+ while True:
+ sys.stdout.write('%s [%s] ' % (question, answers))
+ sys.stdout.flush()
+ if auto is not None:
+ sys.stdout.write(auto + '\n')
+ sys.stdout.flush()
+ return auto
+
+ answer = sys.stdin.readline().strip().lower()
+ if not answer and default:
+ return default[0]
+ elif answer and answer in answers.lower():
+ return answer
+
+def log(msg=None, verbosemsg=None, verbose=False):
+ """Write msg to standard out and flush.
+
+ If verbose is True, write verbosemsg instead.
+ """
+ if verbose:
+ msg = verbosemsg
+ if msg:
+ sys.stdout.write(msg)
+ sys.stdout.flush()
+
+def patch(cmd, diff, path):
+ """Run echo [lines from diff] | cmd -p0"""
+ p = subprocess.Popen([cmd, '-p0'], bufsize=-1, stdin=subprocess.PIPE,
+ universal_newlines=True,
+ preexec_fn=makeresetsigpipe(),
+ cwd=path,
+ close_fds=os.name == 'posix')
+ p.communicate(''.join(diff))
+ return p.returncode == 0
+
+def run(paths, tmpdir, shell, quiet=False, verbose=False, patchcmd=None,
+ answer=None, indent=2):
+ """Run tests in paths in tmpdir.
+
+ If quiet is True, diffs aren't printed. If verbose is True,
+ filenames and status information are printed.
+
+ If patchcmd is set, a prompt is written to stdout asking if
+ changed output should be merged back into the original test. The
+ answer is read from stdin. If 'y', the test is patched using patch
+ based on the changed output.
+ """
+ cwd = os.getcwd()
+ seen = set()
+ basenames = set()
+ skipped = failed = 0
+ for i, path in enumerate(findtests(paths)):
+ abspath = os.path.abspath(path)
+ if abspath in seen:
+ continue
+ seen.add(abspath)
+
+ log(None, '%s: ' % path, verbose)
+ if not os.stat(abspath).st_size:
+ skipped += 1
+ log('s', 'empty\n', verbose)
+ else:
+ basename = os.path.basename(path)
+ if basename in basenames:
+ basename = '%s-%s' % (basename, i)
+ else:
+ basenames.add(basename)
+ testdir = os.path.join(tmpdir, basename)
+ os.mkdir(testdir)
+ try:
+ os.chdir(testdir)
+ refout, postout, diff = test(abspath, shell, indent)
+ finally:
+ os.chdir(cwd)
+
+ errpath = abspath + '.err'
+ if postout is None:
+ skipped += 1
+ log('s', 'skipped\n', verbose)
+ elif not diff:
+ log('.', 'passed\n', verbose)
+ if os.path.exists(errpath):
+ os.remove(errpath)
+ else:
+ failed += 1
+ log('!', 'failed\n', verbose)
+ if not quiet:
+ log('\n', None, verbose)
+ errfile = open(errpath, 'w')
+ try:
+ for line in postout:
+ errfile.write(line)
+ finally:
+ errfile.close()
+ if not quiet:
+ if patchcmd:
+ diff = list(diff)
+ for line in diff:
+ log(line)
+ if (patchcmd and
+ prompt('Accept this change?', 'yN', answer) == 'y'):
+ if patch(patchcmd, diff, os.path.dirname(abspath)):
+ log(None, '%s: merged output\n' % path, verbose)
+ os.remove(errpath)
+ else:
+ log('%s: merge failed\n' % path)
+ log('\n', None, verbose)
+ log('# Ran %s tests, %s skipped, %s failed.\n'
+ % (len(seen), skipped, failed))
+ return bool(failed)
+
+def which(cmd):
+ """Return the patch to cmd or None if not found"""
+ for p in os.environ['PATH'].split(os.pathsep):
+ path = os.path.join(p, cmd)
+ if os.path.isfile(path) and os.access(path, os.X_OK):
+ return os.path.abspath(path)
+ return None
+
+def expandpath(path):
+ """Expands ~ and environment variables in path"""
+ return os.path.expanduser(os.path.expandvars(path))
+
+class OptionParser(optparse.OptionParser):
+ """Like optparse.OptionParser, but supports setting values through
+ CRAM= and .cramrc."""
+
+ def __init__(self, *args, **kwargs):
+ self._config_opts = {}
+ optparse.OptionParser.__init__(self, *args, **kwargs)
+
+ def add_option(self, *args, **kwargs):
+ option = optparse.OptionParser.add_option(self, *args, **kwargs)
+ if option.dest and option.dest != 'version':
+ key = option.dest.replace('_', '-')
+ self._config_opts[key] = option.action == 'store_true'
+ return option
+
+ def parse_args(self, args=None, values=None):
+ config = configparser.RawConfigParser()
+ config.read(expandpath(os.environ.get('CRAMRC', '.cramrc')))
+ defaults = {}
+ for key, isbool in self._config_opts.items():
+ try:
+ if isbool:
+ try:
+ value = config.getboolean('cram', key)
+ except ValueError:
+ value = config.get('cram', key)
+ self.error('--%s: invalid boolean value: %r'
+ % (key, value))
+ else:
+ value = config.get('cram', key)
+ except (configparser.NoSectionError, configparser.NoOptionError):
+ pass
+ else:
+ defaults[key] = value
+ self.set_defaults(**defaults)
+
+ eargs = os.environ.get('CRAM', '').strip()
+ if eargs:
+ import shlex
+ args = args or []
+ args += shlex.split(eargs)
+
+ try:
+ return optparse.OptionParser.parse_args(self, args, values)
+ except optparse.OptionValueError:
+ self.error(str(sys.exc_info()[1]))
+
+def main(args):
+ """Main entry point.
+
+ args should not contain the script name.
+ """
+ p = OptionParser(usage='cram [OPTIONS] TESTS...', prog='cram')
+ p.add_option('-V', '--version', action='store_true',
+ help='show version information and exit')
+ p.add_option('-q', '--quiet', action='store_true',
+ help="don't print diffs")
+ p.add_option('-v', '--verbose', action='store_true',
+ help='show filenames and test status')
+ p.add_option('-i', '--interactive', action='store_true',
+ help='interactively merge changed test output')
+ p.add_option('-y', '--yes', action='store_true',
+ help='answer yes to all questions')
+ p.add_option('-n', '--no', action='store_true',
+ help='answer no to all questions')
+ p.add_option('-E', '--preserve-env', action='store_true',
+ help="don't reset common environment variables")
+ p.add_option('--keep-tmpdir', action='store_true',
+ help='keep temporary directories')
+ p.add_option('--shell', action='store', default='/bin/sh', metavar='PATH',
+ help='shell to use for running tests')
+ p.add_option('--indent', action='store', default=2, metavar='NUM',
+ type='int', help='number of spaces to use for indentation')
+ opts, paths = p.parse_args(args)
+
+ if opts.version:
+ sys.stdout.write("""Cram CLI testing framework (version 0.6)
+
+Copyright (C) 2010-2011 Brodie Rao <brodie@bitheap.org> and others
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+""")
+ return
+
+ conflicts = [('-y', opts.yes, '-n', opts.no),
+ ('-q', opts.quiet, '-i', opts.interactive)]
+ for s1, o1, s2, o2 in conflicts:
+ if o1 and o2:
+ sys.stderr.write('options %s and %s are mutually exclusive\n'
+ % (s1, s2))
+ return 2
+
+ patchcmd = None
+ if opts.interactive:
+ patchcmd = which('patch')
+ if not patchcmd:
+ sys.stderr.write('patch(1) required for -i\n')
+ return 2
+
+ if not paths:
+ sys.stdout.write(p.get_usage())
+ return 2
+
+ badpaths = [path for path in paths if not os.path.exists(path)]
+ if badpaths:
+ sys.stderr.write('no such file: %s\n' % badpaths[0])
+ return 2
+
+ tmpdir = os.environ['CRAMTMP'] = tempfile.mkdtemp('', 'cramtests-')
+ proctmp = os.path.join(tmpdir, 'tmp')
+ os.mkdir(proctmp)
+ for s in ('TMPDIR', 'TEMP', 'TMP'):
+ os.environ[s] = proctmp
+
+ if not opts.preserve_env:
+ for s in ('LANG', 'LC_ALL', 'LANGUAGE'):
+ os.environ[s] = 'C'
+ os.environ['TZ'] = 'GMT'
+ os.environ['CDPATH'] = ''
+ os.environ['COLUMNS'] = '80'
+ os.environ['GREP_OPTIONS'] = ''
+
+ if opts.yes:
+ answer = 'y'
+ elif opts.no:
+ answer = 'n'
+ else:
+ answer = None
+
+ try:
+ return run(paths, tmpdir, opts.shell, opts.quiet, opts.verbose,
+ patchcmd, answer, opts.indent)
+ finally:
+ if opts.keep_tmpdir:
+ log('# Kept temporary directory: %s\n' % tmpdir)
+ else:
+ shutil.rmtree(tmpdir)
+
+if __name__ == '__main__':
+ try:
+ sys.exit(main(sys.argv[1:]))
+ except KeyboardInterrupt:
+ pass
--- /dev/null
+#!/usr/bin/python
+
+from __future__ import print_function
+
+import os, shutil, sys
+import StringIO
+
+fastaSeq_1 = """TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+AACGCAGCTCCGCCCTCGCGGTGCTCTCCGGGTCTGTGCTGAGGAGAACGCAACTCCGCCGGCGCAGGCG"""
+
+fastaSeq_2 = """TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+AACGCAGCTCCGCCCTCGCGGTGCTCTCCGGGTCTGTGCTGAGGAGAACGCAAC"""
+
+fastaSeq_3 = """TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+ACCCTAACCCCAACCCCAACCCCAACCCCAACCCCAACCCCAACCCTAACCCCTAACCCTAACCCT"""
+
+# file creation decorator
+def fileMaker(func):
+ def inner(*args, **kwargs):
+ print(" - Creating file: %s..." % args[1], end='')
+ sys.stdout.flush()
+ retval = func(*args)
+ print("done.")
+ sys.stdout.flush()
+ return retval
+ return inner
+
+# symlink creation decorator
+def fileLinker(func):
+ def inner(*args, **kwargs):
+ print(" - Creating symlink: %s..." % args[1], end='')
+ sys.stdout.flush()
+ retval = func(*args)
+ print("done.")
+ sys.stdout.flush()
+ return retval
+ return inner
+
+# return a copy of original, minues any lines that contain an entry in blacklist
+def trimXmlElements(original, blacklist):
+ out = StringIO.StringIO()
+ for line in original.splitlines():
+ if all(x not in line for x in blacklist):
+ out.write(line + '\n')
+ result = out.getvalue()
+ out.close()
+ return result
+
+class TestDataGenerator:
+
+ def __init__(self, source, dest):
+
+ # source/destination directories
+ self.testDataDir = source
+ self.generatedDataDir = dest
+
+ # generated output files/symlinks & 'maker' functions
+ self.outputFiles = {
+ 'truncated.bam' : self.makeTruncatedBam,
+ 'chunking_emptyfilters.subreadset.xml' : self.makeChunkingXml,
+ 'chunking_missingfilters.subreadset.xml' : self.makeChunkingXml,
+ 'normal.fa' : self.makeNormalFasta
+ }
+ self.outputSymlinks = {
+ 'aligned.bam' : self.makeAlignedBamCopy,
+ 'aligned.bam.bai' : self.makeAlignedBamCopy,
+ 'aligned.bam.pbi' : self.makeAlignedBamCopy,
+ 'aligned2.bam' : self.makeAlignedBamCopy,
+ 'aligned2.bam.bai' : self.makeAlignedBamCopy,
+ 'aligned2.bam.pbi' : self.makeAlignedBamCopy,
+ 'm150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam' : self.makeChunkingSymlink,
+ 'm150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi' : self.makeChunkingSymlink,
+ 'm150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam' : self.makeChunkingSymlink,
+ 'm150404_101626_42267_c100807920800000001823174110291514_s1_p0.2.subreads.bam.pbi' : self.makeChunkingSymlink,
+ 'm150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam' : self.makeChunkingSymlink,
+ 'm150404_101626_42267_c100807920800000001823174110291514_s1_p0.3.subreads.bam.pbi' : self.makeChunkingSymlink,
+ 'missing_pbi.bam' : self.makeMissingPbiBam,
+ }
+
+ def editChunkingXml(self, outputFn, removeFiltersNode):
+ inputXmlFn = os.path.join(self.testDataDir,'chunking','chunking.subreadset.xml')
+ outputXmlFn = os.path.join(self.generatedDataDir,outputFn)
+
+ blacklist = ['pbds:Filter>', 'pbbase:Properties>', '<pbbase:Property']
+ if removeFiltersNode:
+ blacklist.append('pbds:Filters>')
+
+ inputXml = ''
+ with open(inputXmlFn, 'r') as xml_infile:
+ inputXml = xml_infile.read()
+ outputXml = trimXmlElements(inputXml, blacklist)
+ with open(outputXmlFn, 'w') as xml_outfile:
+ xml_outfile.write(outputXml)
+
+ @fileLinker
+ def makeAlignedBamCopy(self, outputFn):
+ source = os.path.join(self.testDataDir,outputFn)
+ dest = os.path.join(self.generatedDataDir, outputFn)
+ os.symlink(source, dest)
+
+ @fileLinker
+ def makeChunkingSymlink(self, outputFn):
+ source = os.path.join(self.testDataDir,'chunking', outputFn)
+ dest = os.path.join(self.generatedDataDir, outputFn)
+ os.symlink(source, dest)
+
+ @fileLinker
+ def makeMissingPbiBam(self, outputFn):
+ source = os.path.join(self.testDataDir, 'phi29.bam')
+ dest = os.path.join(self.generatedDataDir, outputFn)
+ os.symlink(source, dest)
+
+ @fileMaker
+ def makeChunkingXml(self, outputFn):
+ if outputFn == 'chunking_emptyfilters.subreadset.xml':
+ removeFiltersNode = False
+ else:
+ removeFiltersNode = True
+ self.editChunkingXml(outputFn, removeFiltersNode)
+
+ @fileMaker
+ def makeNormalFasta(self, outputFn):
+ content = ">1\n" + fastaSeq_1 + "\n>2\n" + fastaSeq_2 + "\n>3\n" + fastaSeq_3
+ dest = os.path.join(self.generatedDataDir, outputFn)
+ with open(outputFn, 'w') as fasta_out:
+ fasta_out.write(content)
+
+ @fileMaker
+ def makeTruncatedBam(self, outputFn):
+ source = os.path.join(self.testDataDir, 'phi29.bam')
+ dest = os.path.join(self.generatedDataDir, outputFn)
+ shutil.copyfile(source, dest)
+ with open(dest, 'r+b') as in_file:
+ in_file.truncate(200)
+
+ # main entry point
+ def generate(self):
+
+ # skip file if it exists
+ os.chdir(self.generatedDataDir)
+ filenames = self.outputFiles.keys()
+ for file in filenames:
+ if os.path.exists(file) :
+ del self.outputFiles[file]
+
+ # skip symlink if it exists
+ symlinks = self.outputSymlinks.keys()
+ for link in symlinks:
+ if os.path.lexists(link):
+ del self.outputSymlinks[link]
+
+ # only print message & run makers, if any files/symlinks to be created
+ # else silent success
+ if self.outputFiles or self.outputSymlinks:
+ print('Generating test data in %s ' % self.generatedDataDir)
+ for file, func in self.outputFiles.iteritems():
+ func(file)
+ for link, func in self.outputSymlinks.iteritems():
+ func(link)
+
+# script entry point
+if __name__ == '__main__':
+ g = TestDataGenerator(sys.argv[1], sys.argv[2])
+ g.generate()
--- /dev/null
+
+
+using PacBio.BAM;
+
+public class CheckSWIG
+{
+ public static void Main()
+ {
+ var header = new BamHeader();
+ header.ToSam();
+ System.Console.WriteLine("");
+ System.Console.WriteLine("pbbam SWIG binding to C# worked!");
+ System.Console.WriteLine("");
+ }
+}
--- /dev/null
+#pragma warning disable 168, 219
+
+using System;
+using System.IO;
+using System.Linq;
+using System.Collections;
+using System.Collections.Generic;
+using System.Reflection;
+
+using PacBio.BAM;
+
+namespace TestStuff
+{
+ //
+ // This approach is the best we can do for now, without requiring nunit.
+ //
+ public class TestPbbam
+ {
+ public static readonly string DATA_DIR = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location) + "/TestData";
+ public static readonly string BAM_FILENAME_1 = Path.Combine(DATA_DIR, "test_group_query", "test1.bam");
+ public static readonly string BAM_FILENAME_2 = Path.Combine(DATA_DIR, "test_group_query", "test2.bam");
+ public static readonly string STITCHING_FN_1 = Path.Combine(DATA_DIR, "polymerase", "production.subreads.bam");
+ public static readonly string STITCHING_FN_2 = Path.Combine(DATA_DIR, "polymerase", "production.scraps.bam");
+ public static readonly string FASTA_FILENAME = Path.Combine(DATA_DIR, "lambdaNEB.fa");
+
+ public TestPbbam ()
+ {
+ }
+
+ public static void TestExceptions()
+ {
+ try
+ {
+ var badFile = new BamFile("nonexistent.bam");
+ }
+ catch (Exception e)
+ {
+ //Console.Write(e.ToString());
+ Console.WriteLine("Exceptions - OK!");
+ return;
+ }
+ throw new Exception("doh!");
+ }
+
+
+ public static void TestCigar()
+ {
+ string s = "3=3I3D";
+ var c = new CigarType(s);
+ string cs = c.ToStdString();
+ if (s != cs)
+ {
+ throw new Exception("Cigar not working!");
+ }
+
+ // This used to crash
+ var c2 = CigarType.FromStdString("3=3I3D");
+
+ Console.WriteLine("TestCigar - OK!");
+ }
+
+ public static void TestBamFileEnumeration()
+ {
+ var bf = new BamFile(BAM_FILENAME_2);
+ var q = new EntireFileQuery(new DataSet(bf));
+
+ if (q.Count() != 4)
+ {
+ throw new Exception("Enumeration not working!");
+ }
+
+ Console.WriteLine("TesBamFileEnumeration - OK!");
+ }
+
+ public static void TestIndexedFasta()
+ {
+ var f = new IndexedFastaReader(FASTA_FILENAME);
+ bool check = (f.NumSequences() == 1 &&
+ f.HasSequence("lambda_NEB3011") &&
+ f.Subsequence("lambda_NEB3011:0-10") == "GGGCGGCGAC");
+ if (!check)
+ {
+ throw new Exception("Indexed FASTA files not working");
+ }
+
+ var b = new BamRecord();
+ var x = b.Impl();
+
+ Console.WriteLine("TestIndexedFasta - OK!");
+ }
+
+ public static void TestZmwQuery()
+ {
+ var d = new DataSet(BAM_FILENAME_2);
+ var q = new ZmwQuery(new IntList {1, 2, 3}, d);
+ var q2 = new ZmwQuery(new IntList { 14743 }, d);
+
+ if (0 != q.Count() || 4 != q2.Count())
+ {
+ throw new Exception("ZmwQuery not working");
+ }
+ Console.WriteLine("TestZmwQuery - OK!");
+ }
+
+ public static void TestStitching()
+ {
+ var stitcher = new ZmwReadStitcher(STITCHING_FN_1, STITCHING_FN_2);
+ if (!stitcher.HasNext())
+ {
+ throw new Exception("Error stitching via ZmwReadStitcher");
+ }
+ var zmwRecord = stitcher.Next();
+
+ Console.WriteLine("TestStitching - OK!");
+ }
+
+ public void RunAllTests()
+ {
+ TestExceptions();
+ TestCigar();
+ TestBamFileEnumeration();
+ TestIndexedFasta();
+ TestZmwQuery();
+ TestStitching();
+ }
+
+ public static void Main()
+ {
+ var t = new TestPbbam();
+ t.RunAllTests();
+ }
+
+ }
+}
--- /dev/null
+#!/bin/bash
+set -euo pipefail
+set -x
+
+
+# This is a temporary hack to build windows C# bindings, while I work on getting it
+# to work nicely through CMake --DHA
+
+# UNIX setup (before doing this!)
+# - install swig, cmake, and mono
+#
+# Windows setup (before doing this!):
+# - install msys2 (64-bit) from https://msys2.github.io/
+# - install: (pacman -S [package-name]); in all cases use the mingw-w64 64bit version when
+# available:
+# + gcc, g++, make
+# + zlib-dev
+# + swig
+# + cmake
+# - for cmake, generate MSYS makefiles
+
+if [ ${WIN32} ]; then
+ PLATFORM="Windows"
+else
+ PLATFORM="Unix"
+fi
+
+MSBUILD=${CSHARP_PROJECT_BUILDER}
+CSC=${CSHARP_COMPILER}
+
+CSPROJ_ROOT=${PacBioBAM_CSharpLibDir}
+CSPROJ=${PacBioBAM_CSharpLibDir}/PacBio.BAM.csproj
+ASSEMBLY_ROOT=${PacBioBAM_CSharpLibDir}/bin/Debug
+
+# get expanded cmake generator expression ( $<TARGET:hts> or externally defined -DHTSLIB_LIBRARIES="")
+# from cmd line
+EXPANDED_HTSLIB_LIBRARIES=("$@")
+
+#
+# Make the managed DLL
+#
+( cd $CSPROJ_ROOT; $MSBUILD $CSPROJ ) || { echo "Failed to build managed DLL" && exit -1; }
+
+#
+# Copy the dependency libs
+#
+cp ${PacBioBAM_LibDir}/libpbbam${CMAKE_SHARED_LIBRARY_SUFFIX} $ASSEMBLY_ROOT
+cp "$EXPANDED_HTSLIB_LIBRARIES" $ASSEMBLY_ROOT # Need "libhts*.dylib"
+
+if [ "$PLATFORM" == "Windows" ]
+then
+ # stuff we need to bundle on windows
+ cp /mingw64/bin/zlib1.dll $ASSEMBLY_ROOT
+ cp /mingw64/bin/libwinpthread-1.dll $ASSEMBLY_ROOT
+ cp ${PacBioBAM_CSharpLibDir}/libPacBioBam.dll $ASSEMBLY_ROOT/PacBioBam.dll
+else
+ # For UNIX this is .so, even Mac. Not sure why.
+ cp ${PacBioBAM_CSharpLibDir}/libPacBioBam.so $ASSEMBLY_ROOT
+# cp ${HTSLIB_LIBRARIES_VERSIONED_LINK} $ASSEMBLY_ROOT # Need "libhts*.dylib"
+fi
+
+# Bundle test data
+mkdir -p $ASSEMBLY_ROOT/TestData
+cp -rf ${PacBioBAM_TestsDir}/data/* $ASSEMBLY_ROOT/TestData
+
+#
+# Make the "check" program, which we need to put next to the assembly to
+# allow it to be resolved.
+#
+
+CHECK_SRC=${CSharpTestRootDir}/CheckSWIG.cs
+CHECK_BIN=$ASSEMBLY_ROOT/CheckSWIG.exe
+
+
+if [ "$PLATFORM" == "Windows" ]
+then
+ ( $CSC /lib:$ASSEMBLY_ROOT //r:PacBio.BAM.dll /out:$CHECK_BIN $(cygpath -w $CHECK_SRC) && $CHECK_BIN)
+else
+ ( $CSC /lib:$ASSEMBLY_ROOT /r:PacBio.BAM.dll /out:$CHECK_BIN $CHECK_SRC && cd $ASSEMBLY_ROOT && mono CheckSWIG.exe )
+fi
+
+
+#
+# Build the fuller test suite, and bundle
+#
+TEST_SRC=${CSharpTestRootDir}/TestPbbam.cs
+TEST_BIN=$ASSEMBLY_ROOT/TestPbbam.exe
+
+
+if [ "$PLATFORM" == "Windows" ]
+then
+ ( $CSC /lib:$ASSEMBLY_ROOT //r:PacBio.BAM.dll /out:$TEST_BIN $(cygpath -w $TEST_SRC) && $TEST_BIN )
+else
+ ( $CSC /lib:$ASSEMBLY_ROOT /r:PacBio.BAM.dll /out:$TEST_BIN $TEST_SRC && cd $ASSEMBLY_ROOT && mono TestPbbam.exe )
+fi
--- /dev/null
+#!/bin/sh
+(cd ${PacBioBAM_CSharpLibDir}; xbuild PacBio.BAM.csproj)
+
+${CSHARP_COMPILER} -lib:${PacBioBAM_CSharpLibDir}/bin/Debug -r:PacBio.BAM.dll CheckSWIG.cs
+
+#
+# For deployment these all need to be installed somewhere more sensible.
+# This is just a hack for testing if the build works.
+#
+LIBRARY_PATHS=\
+${PacBioBAM_CSharpLibDir}:\
+${PacBioBAM_LibDir}:\
+${Htslib_LibDir}
+
+DYLD_LIBRARY_PATH=$LIBRARY_PATHS \
+LD_LIBRARY_PATH=$LIBRARY_PATHS \
+MONO_PATH=${PacBioBAM_CSharpLibDir}/bin/Debug \
+mono CheckSWIG.exe
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+# load PacBioBAM lib & wrapper script
+
+# htslib_libname <- "@Htslib_Libraries@"
+# dyn.load(htslib_libname)
+# # htslib_libname <- paste(htslib_libpath, "libhts", sep="/")
+# # dyn.load(paste(htslib_libname, ".dylib", sep=""))
+
+pbbam_libpath <- "@PacBioBAM_RLibDir@"
+pbbam_libname <- paste(pbbam_libpath, "PacBioBam", sep="/")
+dyn.load(paste(pbbam_libname, .Platform$dynlib.ext, sep=""))
+
+pbbam_wrapper <- paste(pbbam_libpath, "PacBioBam.R", sep="/")
+source(pbbam_wrapper)
+
+cacheMetaData(1)
+
+h <- BamHeader()
+
+message = "\nR Wrapper OK.\n"
+cat(message)
+cat("\n")
--- /dev/null
+
+# usage: R [args] < test_pbbam.R --args <tests_scripts_dir> <PacBioBam_lib_dir> <test_data_dir>
+args <- commandArgs(TRUE)
+tests_path <- args[1]
+lib_path <- args[2]
+test_data_path <- args[3]
+
+# load PacBioBAM lib & wrapper script
+pbbam_libname <- paste(lib_path, "PacBioBam", sep="/")
+pbbam_wrapper <- paste(lib_path, "PacBioBam.R", sep="/")
+dyn.load(paste(pbbam_libname, .Platform$dynlib.ext, sep=""))
+source(pbbam_wrapper)
+cacheMetaData(1)
+
+# init test utils & run test cases
+source(paste(tests_path, "utils.R", sep="/"))
+run_test_suite(tests_path)
+
+# print results & exit
+results <- test_suite_results()
+results$print_summary()
+if (results$any_failed())
+ quit(status=1)
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+#! /usr/bin/sh
+
+GENERATED_BAM=@PacBioBAM_TestsDir@/data/generated.bam
+
+touch $GENERATED_BAM
+chmod 644 $GENERATED_BAM
+
+R --slave --no-save < @RTestRootDir@/test_pbbam.R --args \
+ @RTestRootDir@/tests \
+ @PacBioBAM_RLibDir@ \
+ @PacBioBAM_TestsDir@/data
+
+STATUS=$?
+
+rm $GENERATED_BAM
+
+exit $STATUS
\ No newline at end of file
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+test_case("Accuracy_Clamp", {
+
+ a_zero <- Accuracy(0.0)
+ a_neg <- Accuracy(-0.5)
+ a_min <- Accuracy(0.0)
+ a_normal <- Accuracy(0.9)
+ a_max <- Accuracy(1.0)
+ a_tooLarge <- Accuracy(1.1)
+
+ tolerance = 1e-5
+
+ assertTrue( abs(0.0 - a_zero$ToFloat()) <= tolerance )
+ assertTrue( abs(0.0 - a_neg$ToFloat()) <= tolerance )
+ assertTrue( abs(0.0 - a_min$ToFloat()) <= tolerance )
+ assertTrue( abs(0.9 - a_normal$ToFloat()) <= tolerance )
+ assertTrue( abs(1.0 - a_max$ToFloat()) <= tolerance )
+ assertTrue( abs(1.0 - a_tooLarge$ToFloat()) <= tolerance )
+
+ # assertEqual(0.0, a_zero$ToFloat())
+ # assertEqual(0.0, a_neg$ToFloat())
+ # assertEqual(0.0, a_min$ToFloat())
+ # assertEqual(0.9, a_normal$ToFloat())
+ # assertEqual(1.0, a_max$ToFloat())
+ # assertEqual(1.0, a_tooLarge$ToFloat())
+})
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+test_case("BamFile_NonExistentFile", {
+ result <- tryCatch(
+ {
+ f <- BamFile("does_not_exist.bam")
+ assertTrue(FALSE) # should have thrown
+ invisible()
+ },
+ warning = function(w) {
+ assertTrue(TRUE)
+ invisible()
+ },
+ error = function(e) {
+ assertTrue(TRUE)
+ invisible()
+ }
+ )
+ return(result)
+})
+
+test_case("BamFile_Ctor", {
+
+ fn <- paste(test_data_path, "aligned.bam", sep="/")
+
+ result <- tryCatch(
+ {
+ f <- BamFile(fn)
+ invisible()
+ },
+ warning = function(w) {
+ assertTrue(FALSE)
+ invisible()
+ },
+ error = function(e) {
+ assertTrue(FALSE) # should not throw
+ invisible()
+ }
+ )
+ return(result)
+})
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+empty_program <- function(header) {
+ result <- tryCatch(
+ {
+ pg <- header$Program("foo")
+ assertTrue(FALSE) # should have thrown
+ invisible()
+ },
+ warning = function(w) {
+ assertTrue(TRUE)
+ invisible()
+ }
+ )
+ return(result)
+}
+
+empty_readgroup <- function(header) {
+ result <- tryCatch(
+ {
+ pg <- header$ReadGroup("foo")
+ assertTrue(FALSE) # should have thrown
+ invisible()
+ },
+ warning = function(w) {
+ assertTrue(TRUE)
+ invisible()
+ }
+ )
+ return(result)
+}
+
+empty_sequenceid <- function(header) {
+ result <- tryCatch(
+ {
+ pg <- header$SequenceId("foo")
+ assertTrue(FALSE) # should have thrown
+ invisible()
+ },
+ warning = function(w) {
+ assertTrue(TRUE)
+ invisible()
+ }
+ )
+ return(result)
+}
+
+test_case("BamHeader_Defaults", {
+
+ header <- BamHeader()
+
+ assertEqual(0L, nchar(header$Version()))
+ assertEqual(0L, nchar(header$SortOrder()))
+ assertTrue(header$ReadGroups()$empty())
+ assertTrue(header$Sequences()$empty())
+ assertTrue(header$Programs()$empty())
+
+ pg <- empty_program(header)
+ rg <- empty_readgroup(header)
+ id <- empty_sequenceid(header)
+
+ # TODO: get comment fetching working
+ #assertEqual(1, length(header$Comments()))
+})
+
+test_case("BamHeader_Decode", {
+
+ text <- paste("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1",
+ "@SQ\tSN:chr1\tLN:2038\tSP:chocobo",
+ "@SQ\tSN:chr2\tLN:3042\tSP:chocobo",
+ "@RG\tID:rg1\tSM:control",
+ "@RG\tID:rg2\tSM:condition1",
+ "@RG\tID:rg3\tSM:condition1",
+ "@PG\tID:_foo_\tPN:ide",
+ "@CO\tipsum and so on",
+ "@CO\tcitation needed",
+ sep="\n"
+ )
+
+ header <- BamHeader(text)
+
+ assertEqual("1.1", header$Version())
+ assertEqual("queryname", header$SortOrder())
+ assertEqual("3.0.1", header$PacBioBamVersion())
+
+ assertEqual(3L, header$ReadGroups()$size())
+ assertTrue(header$HasReadGroup("rg1"))
+ assertTrue(header$HasReadGroup("rg2"))
+ assertTrue(header$HasReadGroup("rg3"))
+ assertEqual("control", header$ReadGroup("rg1")$Sample())
+ assertEqual("condition1", header$ReadGroup("rg2")$Sample())
+ assertEqual("condition1", header$ReadGroup("rg3")$Sample())
+
+ assertEqual(2L, header$Sequences()$size())
+ assertTrue(header$HasSequence("chr1"))
+ assertTrue(header$HasSequence("chr2"))
+ assertEqual("chocobo", header$Sequence("chr1")$Species())
+ assertEqual("chocobo", header$Sequence("chr2")$Species())
+ assertEqual("2038", header$Sequence("chr1")$Length())
+ assertEqual("3042", header$Sequence("chr2")$Length())
+
+ assertEqual(1L, header$Programs()$size())
+ assertTrue(header$HasProgram("_foo_"))
+ assertEqual("ide", header$Program("_foo_")$Name())
+
+ # TODO: get comment fetching working
+ # assertEqual(2, header$Comments()$size())
+ # assertEqual("ipsum and so on", header$Comments()[1])
+ # assertEqual("citation needed", header$Comments()[2])
+})
+
+test_case("BamHeader_Encode", {
+
+ expectedText <- paste("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1",
+ "@SQ\tSN:chr1\tLN:2038\tSP:chocobo",
+ "@SQ\tSN:chr2\tLN:3042\tSP:chocobo",
+ "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL",
+ "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL",
+ "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL",
+ "@PG\tID:_foo_\tPN:ide",
+ "@CO\tipsum and so on",
+ "@CO\tcitation needed",
+ "",
+ sep="\n"
+ )
+
+ rg1 <- ReadGroupInfo("rg1")
+ rg1$Sample("control")
+ rg2 <- ReadGroupInfo("rg2")
+ rg2$Sample("condition1")
+ rg3 <- ReadGroupInfo("rg3")
+ rg3$Sample("condition1")
+
+ seq1 <- SequenceInfo("chr1")
+ seq1$Length("2038")
+ seq1$Species("chocobo")
+ seq2 <- SequenceInfo("chr2")
+ seq2$Length("3042")
+ seq2$Species("chocobo")
+
+ prog1 <- ProgramInfo("_foo_")
+ prog1$Name("ide")
+
+ header <- BamHeader()
+ header$Version("1.1")
+ header$SortOrder("queryname")
+ header$PacBioBamVersion("3.0.1")
+ header$AddReadGroup(rg1)
+ header$AddReadGroup(rg2)
+ header$AddReadGroup(rg3)
+ header$AddSequence(seq1)
+ header$AddSequence(seq2)
+ header$AddProgram(prog1)
+ header$AddComment("ipsum and so on")
+ header$AddComment("citation needed")
+
+ assertEqual(expectedText, header$ToSam())
+})
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+test_case("Cigar_TypeToChar", {
+ assertEqual('M', CigarOperation_TypeToChar('ALIGNMENT_MATCH'))
+ assertEqual('I', CigarOperation_TypeToChar('INSERTION'))
+ assertEqual('D', CigarOperation_TypeToChar('DELETION'))
+ assertEqual('N', CigarOperation_TypeToChar('REFERENCE_SKIP'))
+ assertEqual('S', CigarOperation_TypeToChar('SOFT_CLIP'))
+ assertEqual('H', CigarOperation_TypeToChar('HARD_CLIP'))
+ assertEqual('P', CigarOperation_TypeToChar('PADDING'))
+ assertEqual('=', CigarOperation_TypeToChar('SEQUENCE_MATCH'))
+ assertEqual('X', CigarOperation_TypeToChar('SEQUENCE_MISMATCH'))
+})
+
+test_case("Cigar_CharToType", {
+
+ assertEqual('ALIGNMENT_MATCH', CigarOperation_CharToType('M'))
+ assertEqual('INSERTION', CigarOperation_CharToType('I'))
+ assertEqual('DELETION', CigarOperation_CharToType('D'))
+ assertEqual('REFERENCE_SKIP', CigarOperation_CharToType('N'))
+ assertEqual('SOFT_CLIP', CigarOperation_CharToType('S'))
+ assertEqual('HARD_CLIP', CigarOperation_CharToType('H'))
+ assertEqual('PADDING', CigarOperation_CharToType('P'))
+ assertEqual('SEQUENCE_MATCH', CigarOperation_CharToType('='))
+ assertEqual('SEQUENCE_MISMATCH', CigarOperation_CharToType('X'))
+})
+
+test_case("Cigar_SetType", {
+
+ m = CigarOperation()
+ i = CigarOperation()
+ d = CigarOperation()
+ n = CigarOperation()
+ s = CigarOperation()
+ h = CigarOperation()
+ p = CigarOperation()
+ e = CigarOperation()
+ x = CigarOperation()
+
+ m$Type('ALIGNMENT_MATCH')
+ i$Type('INSERTION')
+ d$Type('DELETION')
+ n$Type('REFERENCE_SKIP')
+ s$Type('SOFT_CLIP')
+ h$Type('HARD_CLIP')
+ p$Type('PADDING')
+ e$Type('SEQUENCE_MATCH')
+ x$Type('SEQUENCE_MISMATCH')
+
+ assertEqual('M', m$Char())
+ assertEqual('I', i$Char())
+ assertEqual('D', d$Char())
+ assertEqual('N', n$Char())
+ assertEqual('S', s$Char())
+ assertEqual('H', h$Char())
+ assertEqual('P', p$Char())
+ assertEqual('=', e$Char())
+ assertEqual('X', x$Char())
+})
+
+test_case("Cigar_SetChar", {
+
+ m = CigarOperation()
+ i = CigarOperation()
+ d = CigarOperation()
+ n = CigarOperation()
+ s = CigarOperation()
+ h = CigarOperation()
+ p = CigarOperation()
+ e = CigarOperation()
+ x = CigarOperation()
+
+ m$Char('M')
+ i$Char('I')
+ d$Char('D')
+ n$Char('N')
+ s$Char('S')
+ h$Char('H')
+ p$Char('P')
+ e$Char('=')
+ x$Char('X')
+
+ assertEqual('ALIGNMENT_MATCH', m$Type())
+ assertEqual('INSERTION', i$Type())
+ assertEqual('DELETION', d$Type())
+ assertEqual('REFERENCE_SKIP', n$Type())
+ assertEqual('SOFT_CLIP', s$Type())
+ assertEqual('HARD_CLIP', h$Type())
+ assertEqual('PADDING', p$Type())
+ assertEqual('SEQUENCE_MATCH', e$Type())
+ assertEqual('SEQUENCE_MISMATCH', x$Type())
+})
+
+test_case("Cigar_CigarOpCtors", {
+
+ c1 <- CigarOperation("S", 10)
+ c2 <- CigarOperation(CigarOperation_TypeToChar('SOFT_CLIP'), 10)
+
+ assertEqual('S', c1$Char())
+ assertEqual('S', c2$Char())
+ assertEqual('SOFT_CLIP', c1$Type())
+ assertEqual('SOFT_CLIP', c2$Type())
+ assertEqual(10L, c1$Length())
+ assertEqual(10L, c2$Length())
+})
+
+test_case("Cigar_FromEmptyString", {
+
+ s <- ""
+ cigar <- Cigar(s)
+ assertEqual(0L, cigar$size())
+})
+
+test_case("Cigar_FromString", {
+
+ singleCigarString <- "100="
+ multiCigarString <- "100=2D34I6=6X6="
+
+ singleCigar <- Cigar(singleCigarString)
+ multiCigar <- Cigar(multiCigarString)
+
+ assertEqual(1L, singleCigar$size())
+
+ c <- singleCigar$front()
+ assertEqual('=', c$Char())
+ assertEqual('SEQUENCE_MATCH', c$Type())
+ assertEqual(100L, c$Length())
+
+ assertEqual(6L, multiCigar$size())
+
+ # haven't quite figured out [ ] accessors via SWIG,
+ # but this method does work w/ !ZERO!-based indices
+ op0 <- multiCigar$'__getitem__'(0)
+ op1 <- multiCigar$'__getitem__'(1)
+ op2 <- multiCigar$'__getitem__'(2)
+ op3 <- multiCigar$'__getitem__'(3)
+ op4 <- multiCigar$'__getitem__'(4)
+ op5 <- multiCigar$'__getitem__'(5)
+
+ assertEqual('=', op0$Char())
+ assertEqual('D', op1$Char())
+ assertEqual('I', op2$Char())
+ assertEqual('=', op3$Char())
+ assertEqual('X', op4$Char())
+ assertEqual('=', op5$Char())
+ assertEqual('SEQUENCE_MATCH', op0$Type())
+ assertEqual('DELETION', op1$Type())
+ assertEqual('INSERTION', op2$Type())
+ assertEqual('SEQUENCE_MATCH', op3$Type())
+ assertEqual('SEQUENCE_MISMATCH', op4$Type())
+ assertEqual('SEQUENCE_MATCH', op5$Type())
+ assertEqual(100L, op0$Length())
+ assertEqual(2L, op1$Length())
+ assertEqual(34L, op2$Length())
+ assertEqual(6L, op3$Length())
+ assertEqual(6L, op4$Length())
+ assertEqual(6L, op5$Length())
+})
+
+test_case("Cigar_ToEmptyString", {
+
+ cigar <- Cigar()
+ assertEqual(0L, nchar(cigar$ToStdString())) # empty string is 1
+})
+
+test_case("Cigar_ToString", {
+
+ singleCigarString <- "100="
+ multiCigarString <- "100=2D34I6=6X6="
+
+ singleCigar <- Cigar()
+ singleCigar$push_back( CigarOperation(CigarOperation_TypeToChar('SEQUENCE_MATCH'), 100) )
+
+ multiCigar <- Cigar()
+ multiCigar$push_back(CigarOperation('=', 100))
+ multiCigar$push_back(CigarOperation('D', 2))
+ multiCigar$push_back(CigarOperation('I', 34))
+ multiCigar$push_back(CigarOperation('=', 6))
+ multiCigar$push_back(CigarOperation('X', 6))
+ multiCigar$push_back(CigarOperation('=', 6))
+
+ assertEqual(singleCigarString, singleCigar$ToStdString())
+ assertEqual(multiCigarString, multiCigar$ToStdString())
+})
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+originalNames <- function(inputFn, generatedFn) {
+
+ result <- tryCatch(
+ {
+ file <- BamFile(inputFn)
+ writer <- BamWriter(generatedFn, file$Header())
+
+ ds <- DataSet(inputFn)
+ entireFile <- EntireFileQuery(ds)
+
+ names_in <- list()
+ iter <- entireFile$begin()
+ end <- entireFile$end()
+ while ( iter$'__ne__'(end) ) {
+ record <- iter$value()
+ names_in <- c(names_in, record$FullName())
+ writer$Write(record)
+ iter$incr()
+ }
+ writer$'delete_BamWriter'()
+ return(names_in)
+ },
+ error = function(e) {
+ assertEqual("why:", e$message) # should not throw
+ return(list())
+ }
+ )
+ return(result)
+}
+
+generatedNames <- function(generatedFn) {
+
+ result <- tryCatch(
+ {
+ ds <- DataSet(generatedFn)
+ entireFile <- EntireFileQuery(ds)
+
+ names_out <- list()
+ iter <- entireFile$begin()
+ end <- entireFile$end()
+ while ( iter$'__ne__'(end) ) {
+ names_out <- c(names_out, iter$FullName())
+ iter$incr()
+ }
+ return(names_out)
+ },
+ error = function(e) {
+ assertEqual("why:", e$message) # should not throw
+ return(list())
+ }
+ )
+ return(result)
+}
+
+#test_case("EndToEnd_CopyFileAndReadBack", {
+#
+# inputFn <- paste(test_data_path, "aligned.bam", sep="/")
+# generatedFn <- paste(test_data_path, "generated.bam", sep="/")
+#
+# # loop over original file, store names, write to generated file
+# names_in <- originalNames(inputFn, generatedFn)
+#
+# # read names from new file
+# names_out <- generatedNames(generatedFn)
+#
+# # ensure equal
+# assertEqual(names_in, names_out)
+#})
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTA,
+# SPECIA, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+testFrames_rawData <- list(
+ 0, 8, 140, 0, 0, 7, 4, 0, 85, 2,
+ 1, 3, 2, 10, 1, 20, 47, 10, 9, 60,
+ 20, 3, 12, 5, 13, 165, 6, 14, 22, 12,
+ 2, 4, 9, 218, 27, 3, 15, 2, 17, 2,
+ 45, 24, 89, 10, 7, 1, 11, 15, 0, 7,
+ 0, 28, 17, 12, 6, 10, 37, 0, 12, 52,
+ 0, 7, 1, 14, 3, 26, 12, 0, 20, 17,
+ 2, 13, 2, 9, 13, 7, 15, 29, 3, 6,
+ 2, 1, 28, 10, 3, 14, 7, 1, 22, 1,
+ 6, 6, 0, 19, 31, 6, 2, 14, 0, 0,
+ 1000, 947, 948
+)
+
+encodedFrames_rawData <- list(
+ 0, 8, 102, 0, 0, 7, 4, 0, 75, 2, 1, 3, 2,
+ 10, 1, 20, 47, 10, 9, 60, 20, 3, 12, 5, 13, 115,
+ 6, 14, 22, 12, 2, 4, 9, 135, 27, 3, 15, 2, 17,
+ 2, 45, 24, 77, 10, 7, 1, 11, 15, 0, 7, 0, 28,
+ 17, 12, 6, 10, 37, 0, 12, 52, 0, 7, 1, 14, 3,
+ 26, 12, 0, 20, 17, 2, 13, 2, 9, 13, 7, 15, 29,
+ 3, 6, 2, 1, 28, 10, 3, 14, 7, 1, 22, 1, 6,
+ 6, 0, 19, 31, 6, 2, 14, 0, 0,
+ 255, 254, 255
+)
+
+testFrames <- UInt16List()
+for (x in testFrames_rawData)
+ testFrames$push_back(x)
+
+encodedFrames <- UInt16List()
+for (x in encodedFrames_rawData)
+ encodedFrames$push_back(x)
+
+test_case("Frames_Basic", {
+
+ f <- Frames()
+ assertEqual(0L, f$Data()$size())
+
+ f2 <- Frames(testFrames)
+ d <- f2$Data()
+ assertEqual(length(testFrames), length(d))
+
+ numFrames <- length(testFrames)
+ for (i in 1:numFrames)
+ assertEqual(testFrames[i], d[i])
+})
+
+test_case("Frames_Downsample", {
+
+ f <- Frames(testFrames)
+ d <- f$Data()
+ assertEqual(length(encodedFrames), length(d))
+
+ numFrames <- length(encodedFrames)
+ for (i in 1:numFrames)
+ assertEqual(encodedFrames[i], d[i])
+})
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+test_case("Intervals_UnmappedPosition", {
+ assertEqual(-1L, UnmappedPosition())
+})
+
+test_case("Intervals_Ctors", {
+
+ empty <- PositionInterval()
+ single <- PositionInterval(4)
+ normal <- PositionInterval(5, 8)
+
+ assertEqual(0L, empty$Start())
+ assertEqual(0L, empty$Stop())
+ assertEqual(4L, single$Start())
+ assertEqual(5L, single$Stop())
+ assertEqual(5L, normal$Start())
+ assertEqual(8L, normal$Stop())
+})
+
+test_case("Intervals_Equality", {
+
+ empty <- PositionInterval()
+ empty2 <- PositionInterval()
+ singleton <- PositionInterval(4)
+ sameAsSingleton <- PositionInterval(4, 5)
+ normal <- PositionInterval(5, 8)
+ sameAsNormal <- PositionInterval(5, 8)
+ different <- PositionInterval(20, 40)
+
+ # self-equality
+ assertEqual(empty, empty)
+ assertEqual(singleton, singleton)
+ assertEqual(normal, normal)
+ assertEqual(different, different)
+
+ # same values
+ # TODO: fix this to work with == or *anything* cleaner
+ assertTrue(empty$'__eq__'(empty2))
+ assertTrue(singleton$'__eq__'(sameAsSingleton))
+ assertTrue(normal$'__eq__'(sameAsNormal))
+
+ # different values
+ assertNotEqual(empty, singleton)
+ assertNotEqual(empty, normal)
+ assertNotEqual(empty, different)
+ assertNotEqual(singleton, normal)
+ assertNotEqual(normal, different)
+})
+
+test_case("Intervals_Copy", {
+
+ interval1 <- PositionInterval(5,8)
+ interval2 <- PositionInterval(interval1)
+ interval3 <- interval1
+
+ # TODO: fix this to work with == or *anything* cleaner
+ assertTrue(interval1$'__eq__'(interval1))
+ assertTrue(interval1$'__eq__'(interval2))
+ assertTrue(interval1$'__eq__'(interval3))
+})
+
+test_case("Intervals_Modifiers", {
+
+ interval1 <- PositionInterval(5,8)
+ interval2 <- PositionInterval(interval1)
+ interval2$Start(2)
+ interval2$Stop(10)
+
+ assertNotEqual(interval1, interval2)
+ assertEqual(2L, interval2$Start())
+ assertEqual(10L, interval2$Stop())
+})
+
+test_case("Intervals_Cover", {
+
+ a <- PositionInterval(2,4)
+ b <- PositionInterval(3,5)
+ c <- PositionInterval(6,8)
+ d <- PositionInterval(1,7)
+ e <- PositionInterval(5,8)
+
+ # 0123456789
+ # a --
+ # b --
+ # c --
+ # d ------
+ # e ---
+
+ # self-cover
+ assertTrue(a$Covers(a))
+ assertTrue(a$CoveredBy(a))
+
+ # basic covers/covered
+ assertTrue(b$CoveredBy(d))
+ assertTrue(d$Covers(b))
+ assertNotEqual(b, d)
+ assertFalse(b$Covers(d))
+
+ # completely disjoint
+ assertFalse(b$Covers(c))
+ assertFalse(c$Covers(b))
+ assertFalse(b$CoveredBy(c))
+ assertFalse(c$CoveredBy(b))
+
+ # b.stop == e.start
+ assertFalse(b$Covers(e))
+ assertFalse(b$CoveredBy(e))
+
+ # shared endpoint, start contained
+ assertTrue(e$Covers(c))
+ assertTrue(c$CoveredBy(e))
+})
+
+test_case("Intervals_Intersect", {
+
+ a <- PositionInterval(2,4)
+ b <- PositionInterval(3,5)
+ c <- PositionInterval(6,8)
+ d <- PositionInterval(1,7)
+ e <- PositionInterval(5,8)
+
+ # 0123456789
+ # a --
+ # b --
+ # c --
+ # d ------
+ # e ---
+
+ # self-intersection
+ assertTrue(a$Intersects(a))
+
+ # intersection is commutative
+ assertTrue(a$Intersects(b))
+ assertTrue(b$Intersects(a))
+
+ # covered implies intersection
+ assertTrue(d$Covers(a))
+ assertTrue(a$Intersects(d))
+ assertTrue(d$Intersects(a))
+
+ # c.start > b.stop (obvious disjoint)
+ assertFalse(b$Intersects(c))
+
+ # b.stop == e.start (intervals are right-open, so disjoint)
+ assertFalse(b$Intersects(e))
+})
+
+test_case("Intervals_Validity", {
+
+ a <- PositionInterval() # default ctor
+ b <- PositionInterval(0,0) # start == stop (zero)
+ c <- PositionInterval(4,4) # start == stop (nonzero)
+ d <- PositionInterval(0,1) # start < stop (start is zero)
+ e <- PositionInterval(4,5) # start < stop (start is nonzero)
+ f <- PositionInterval(5,4) # start > stop
+
+ assertFalse(a$IsValid())
+ assertFalse(b$IsValid())
+ assertFalse(c$IsValid())
+ assertTrue(d$IsValid())
+ assertTrue(e$IsValid())
+ assertFalse(f$IsValid())
+})
+
+test_case("Intervals_Length",{
+
+ a <- PositionInterval(2,4)
+ b <- PositionInterval(3,5)
+ c <- PositionInterval(6,8)
+ d <- PositionInterval(1,7)
+ e <- PositionInterval(5,8)
+
+ assertEqual(2L, a$Length())
+ assertEqual(2L, b$Length())
+ assertEqual(2L, c$Length())
+ assertEqual(6L, d$Length())
+ assertEqual(3L, e$Length())
+})
+
+test_case("GenomicIntervals_Ctors", {
+
+ empty <- GenomicInterval()
+ normal <- GenomicInterval("seq1", 100, 200)
+
+ assertEqual("", empty$Name())
+ assertEqual(0L, empty$Start())
+ assertEqual(0L, empty$Stop())
+
+ assertEqual("seq1", normal$Name())
+ assertEqual(100L, normal$Start())
+ assertEqual(200L, normal$Stop())
+})
+
+test_case("GenomicIntervals_Copy", {
+
+ a <- GenomicInterval("seq1", 10, 20)
+ b <- GenomicInterval(a)
+ c <- a
+
+ # TODO: fix this to work with == or *anything* cleaner
+ assertTrue(a$'__eq__'(a))
+ assertTrue(a$'__eq__'(b))
+ assertTrue(a$'__eq__'(c))
+})
+
+test_case("GenomicIntervals_Modifiers", {
+
+ a <- GenomicInterval("seq1", 10, 20)
+
+ b <- GenomicInterval(a)
+ b$Name("seq5")
+ b$Start(2)
+ b$Stop(10)
+
+ c <- GenomicInterval(a)
+ c$Interval(b$Interval())
+
+ assertNotEqual(a, b)
+
+ assertEqual("seq5", b$Name())
+ assertEqual(2L, b$Start())
+ assertEqual(10L, b$Stop())
+
+ assertEqual(a$Name(), c$Name())
+
+ # TODO: fix this to work with == or *anything* cleaner
+ assertTrue(b$Interval()$'__eq__'(c$Interval()))
+})
+
+test_case("GenomicIntervals_Cover", {
+
+ a <- GenomicInterval("seq1",2,4)
+ b <- GenomicInterval("seq1",3,5)
+ c <- GenomicInterval("seq1",6,8)
+ d <- GenomicInterval("seq1",1,7)
+ e <- GenomicInterval("seq1",5,8)
+ f <- GenomicInterval("seq2",3,5) # same as b, different ref
+
+ # 0123456789
+ # a --
+ # b --
+ # c --
+ # d ------
+ # e ---
+
+ # self-cover
+ assertTrue(a$Covers(a))
+ assertTrue(a$CoveredBy(a))
+
+ # basic covers/covered
+ assertTrue(b$CoveredBy(d))
+ assertTrue(d$Covers(b))
+ assertNotEqual(b, d)
+ assertFalse(b$Covers(d))
+
+ # same coords as b, but different ref
+ assertFalse(f$CoveredBy(d))
+ assertFalse(d$Covers(f))
+ assertNotEqual(f, d)
+ assertFalse(f$Covers(d))
+
+ # obvious disjoint
+ assertFalse(b$Covers(c))
+ assertFalse(c$Covers(b))
+ assertFalse(b$CoveredBy(c))
+ assertFalse(c$CoveredBy(b))
+
+ # b.stop == e.start (intervals are right-open, so disjoint)
+ assertFalse(b$Covers(e))
+ assertFalse(b$CoveredBy(e))
+
+ # shared endpoint, start contained
+ assertTrue(e$Covers(c))
+ assertTrue(c$CoveredBy(e))
+
+ # assertTrue(FALSE)
+})
+
+test_case("GenomicIntervals_Validity", {
+
+ a <- GenomicInterval() # default
+ b <- GenomicInterval("seq1",0,0) # valid id, start == stop (zero)
+ c <- GenomicInterval("seq1",4,4) # valid id, start == stop (non-zero)
+ d <- GenomicInterval("seq",0,1) # valid id, start < stop (start == zero) OK
+ e <- GenomicInterval("seq1",4,5) # valid id, start < stop (start > zero) OK
+ f <- GenomicInterval("seq1",5,4) # valid id, start > stop
+ g <- GenomicInterval("",0,0) # invalid id, start == stop (zero)
+ h <- GenomicInterval("",4,4) # invalid id, start == stop (non-zero)
+ i <- GenomicInterval("",0,1) # invalid id, start < stop (start == zero)
+ j <- GenomicInterval("",4,5) # invalid id, start < stop (start > zero)
+ k <- GenomicInterval("",5,4) # invalid id, start > stop
+
+ assertTrue(d$IsValid())
+ assertTrue(e$IsValid())
+ assertFalse(a$IsValid())
+ assertFalse(b$IsValid())
+ assertFalse(c$IsValid())
+ assertFalse(f$IsValid())
+ assertFalse(g$IsValid())
+ assertFalse(h$IsValid())
+ assertFalse(i$IsValid())
+ assertFalse(j$IsValid())
+ assertFalse(k$IsValid())
+})
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF
+# USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+compareContainers <- function(c1, c2) {
+
+ assertEqual(length(c1), length(c2))
+
+ numElements <- length(c1)
+ for (i in 1:numElements)
+ assertEqual(c1[i], c2[i])
+}
+
+compareFrames <- function(f1, f2) {
+
+ d1 <- f1$Data()
+ d2 <- f2$Data()
+ compareContainers(d1, d2)
+}
+
+compareRecords <- function(b1, b2) {
+
+ assertTrue(b1$HasDeletionQV())
+ assertTrue(b1$HasDeletionTag())
+ assertTrue(b1$HasInsertionQV())
+ assertTrue(b1$HasMergeQV())
+ assertTrue(b1$HasSubstitutionQV())
+ assertTrue(b1$HasSubstitutionTag())
+ assertTrue(b1$HasLabelQV())
+ assertTrue(b1$HasAltLabelQV())
+ assertTrue(b1$HasAltLabelTag())
+ assertTrue(b1$HasPkmean())
+ assertTrue(b1$HasPkmid())
+ assertTrue(b1$HasPulseCall())
+ assertTrue(b1$HasIPD())
+ assertTrue(b1$HasPulseWidth())
+ assertTrue(b1$HasPrePulseFrames())
+ assertTrue(b1$HasPulseCallWidth())
+ assertTrue(b1$HasPulseMergeQV())
+
+ assertTrue(b2$HasDeletionQV())
+ assertTrue(b2$HasDeletionTag())
+ assertTrue(b2$HasInsertionQV())
+ assertTrue(b2$HasMergeQV())
+ assertTrue(b2$HasSubstitutionQV())
+ assertTrue(b2$HasSubstitutionTag())
+ assertTrue(b2$HasLabelQV())
+ assertTrue(b2$HasAltLabelQV())
+ assertTrue(b2$HasAltLabelTag())
+ assertTrue(b2$HasPkmean())
+ assertTrue(b2$HasPkmid())
+ assertTrue(b2$HasPulseCall())
+ assertTrue(b2$HasIPD())
+ assertTrue(b2$HasPulseWidth())
+ assertTrue(b2$HasPrePulseFrames())
+ assertTrue(b2$HasPulseCallWidth())
+ assertTrue(b2$HasPulseMergeQV())
+
+ assertEqual(b1$FullName(), b2$FullName())
+ assertEqual(b1$HoleNumber(), b2$HoleNumber())
+ assertEqual(b1$NumPasses(), b2$NumPasses())
+ assertEqual(b1$Sequence(), b2$Sequence())
+ assertEqual(b1$DeletionTag(), b2$DeletionTag())
+ assertEqual(b1$SubstitutionTag(), b2$SubstitutionTag())
+ assertEqual(b1$AltLabelTag(), b2$AltLabelTag())
+ assertEqual(b1$PulseCall(), b2$PulseCall())
+
+ # compareContainers(b1$Pkmean(), b2$Pkmean())
+ # compareContainers(b1$Pkmid(), b2$Pkmid())
+ #
+ # compareFrames(b1$IPD(), b2$IPD())
+ # compareFrames(b1$PulseWidth(), b2$PulseWidth())
+ # compareFrames(b1$PrePulseFrames(), b2$PrePulseFrames())
+ # compareFrames(b1$PulseCallWidth(), b2$PulseCallWidth())
+
+ assertEqual(b1$ReadGroup()$Id(), b2$ReadGroup()$Id())
+
+ assertEqual(b1$Qualities()$Fastq(), b2$Qualities()$Fastq())
+ assertEqual(b1$DeletionQV()$Fastq(), b2$DeletionQV()$Fastq())
+ assertEqual(b1$InsertionQV()$Fastq(), b2$InsertionQV()$Fastq())
+ assertEqual(b1$MergeQV()$Fastq(), b2$MergeQV()$Fastq())
+ assertEqual(b1$SubstitutionQV()$Fastq(), b2$SubstitutionQV()$Fastq())
+ assertEqual(b1$LabelQV()$Fastq(), b2$LabelQV()$Fastq())
+ assertEqual(b1$AltLabelQV()$Fastq(), b2$AltLabelQV()$Fastq())
+ assertEqual(b1$PulseMergeQV()$Fastq(), b2$PulseMergeQV()$Fastq())
+
+ return
+}
+
+getVirtualRecord <- function(fn1, fn2) {
+
+ result <- tryCatch(
+ {
+ vpr <- ZmwReadStitcher(fn1, fn2)
+ assertTrue(vpr$HasNext())
+ virtualRecord <- vpr$Next()
+ return(virtualRecord)
+ },
+ error = function(e) {
+ print(paste('e:',e))
+ assertTrue(FALSE) # should not throw
+ return
+ }
+ )
+ return(result)
+}
+
+getPolymeraseRecord <- function(fn) {
+
+ result <- tryCatch(
+ {
+ ds <- DataSet(fn)
+ entireFile <- EntireFileQuery(ds)
+ polyIter <- entireFile$begin()
+ polyEnd <- entireFile$end()
+ assertTrue(polyIter$'__ne__'(polyEnd))
+ polyRecord <- polyIter$value()
+ return(polyRecord)
+ },
+ error = function(e) {
+ print(paste('e:',e))
+ assertTrue(FALSE) # should not throw
+ return
+ }
+ )
+ return(result)
+}
+
+test_case("PolymeraseStitching_VirtualRegions", {
+
+ subreadsFn <- paste(test_data_path, "polymerase/internal.subreads.bam", sep="/")
+ scrapsFn <- paste(test_data_path, "polymerase/internal.scraps.bam", sep="/")
+ virtualRecord <- getVirtualRecord(subreadsFn, scrapsFn)
+
+ # -- ADAPTER -- #
+
+ adapter <- virtualRecord$VirtualRegionsTable('ADAPTER')
+ assertEqual(7L, adapter$size())
+
+ region <- adapter$'__getitem__'(0)
+ assertEqual(3047L, region$beginPos)
+ assertEqual(3095L, region$endPos)
+
+ region <- adapter$'__getitem__'(1)
+ assertEqual(3650L, region$beginPos)
+ assertEqual(3700L, region$endPos)
+
+ region <- adapter$'__getitem__'(2)
+ assertEqual(4289L, region$beginPos)
+ assertEqual(4335L, region$endPos)
+
+ region <- adapter$'__getitem__'(3)
+ assertEqual(4888L, region$beginPos)
+ assertEqual(4939L, region$endPos)
+
+ region <- adapter$'__getitem__'(4)
+ assertEqual(5498L, region$beginPos)
+ assertEqual(5546L, region$endPos)
+
+ region <- adapter$'__getitem__'(5)
+ assertEqual(6116L, region$beginPos)
+ assertEqual(6173L, region$endPos)
+
+ region <- adapter$'__getitem__'(6)
+ assertEqual(6740L, region$beginPos)
+ assertEqual(6790L, region$endPos)
+
+ # -- BARCODE -- #
+
+ barcode = virtualRecord$VirtualRegionsTable('BARCODE')
+ assertEqual(14L, barcode$size())
+
+ region <- barcode$'__getitem__'(0)
+ assertEqual(3025L, region$beginPos)
+ assertEqual(3047L, region$endPos)
+
+ region <- barcode$'__getitem__'(1)
+ assertEqual(3095L, region$beginPos)
+ assertEqual(3116L, region$endPos)
+
+ region <- barcode$'__getitem__'(2)
+ assertEqual(3628L, region$beginPos)
+ assertEqual(3650L, region$endPos)
+
+ region <- barcode$'__getitem__'(3)
+ assertEqual(3700L, region$beginPos)
+ assertEqual(3722L, region$endPos)
+
+ region <- barcode$'__getitem__'(4)
+ assertEqual(4267L, region$beginPos)
+ assertEqual(4289L, region$endPos)
+
+ region <- barcode$'__getitem__'(5)
+ assertEqual(4335L, region$beginPos)
+ assertEqual(4356L, region$endPos)
+
+ region <- barcode$'__getitem__'(6)
+ assertEqual(4864L, region$beginPos)
+ assertEqual(4888L, region$endPos)
+
+ region <- barcode$'__getitem__'(7)
+ assertEqual(4939L, region$beginPos)
+ assertEqual(4960L, region$endPos)
+
+ region <- barcode$'__getitem__'(8)
+ assertEqual(5477L, region$beginPos)
+ assertEqual(5498L, region$endPos)
+
+ region <- barcode$'__getitem__'(9)
+ assertEqual(5546L, region$beginPos)
+ assertEqual(5571L, region$endPos)
+
+ region <- barcode$'__getitem__'(10)
+ assertEqual(6087L, region$beginPos)
+ assertEqual(6116L, region$endPos)
+
+ region <- barcode$'__getitem__'(11)
+ assertEqual(6173L, region$beginPos)
+ assertEqual(6199L, region$endPos)
+
+ region <- barcode$'__getitem__'(12)
+ assertEqual(6719L, region$beginPos)
+ assertEqual(6740L, region$endPos)
+
+ region <- barcode$'__getitem__'(13)
+ assertEqual(6790L, region$beginPos)
+ assertEqual(6812L, region$endPos)
+
+ # -- LQREGION -- #
+
+ lqregion = virtualRecord$VirtualRegionsTable('LQREGION')
+ assertEqual(2L, lqregion$size())
+
+ region <- lqregion$'__getitem__'(0)
+ assertEqual(0L, region$beginPos)
+ assertEqual(2659L, region$endPos)
+
+ region <- lqregion$'__getitem__'(1)
+ assertEqual(7034L, region$beginPos)
+ assertEqual(7035L, region$endPos)
+
+ # -- HQREGION -- #
+
+ hqregion = virtualRecord$VirtualRegionsTable('HQREGION')
+ assertEqual(1L, hqregion$size())
+
+ region <- hqregion$'__getitem__'(0)
+ assertEqual(2659L, region$beginPos)
+ assertEqual(7034L, region$endPos)
+})
+
+test_case("PolymeraseStitching_InternalSubreadsToOriginal", {
+
+ # stitch virtual polymerase record
+ subreadsFn <- paste(test_data_path, "polymerase/internal.subreads.bam", sep="/")
+ scrapsFn <- paste(test_data_path, "polymerase/internal.scraps.bam", sep="/")
+ virtualRecord <- getVirtualRecord(subreadsFn, scrapsFn)
+
+ # fetch original polymerase record
+ polyFn <- paste(test_data_path, "polymerase/internal.polymerase.bam", sep="/")
+ polyRecord <- getPolymeraseRecord(polyFn)
+
+ # check
+ compareRecords(polyRecord, virtualRecord)
+})
+
+test_case("PolymeraseStitching_InternalHQToOriginal", {
+
+ # stitch virtual polymerase record
+ hqRegionFn <- paste(test_data_path, "polymerase/internal.hqregions.bam", sep="/")
+ lqRegionFn <- paste(test_data_path, "polymerase/internal.lqregions.bam", sep="/")
+ virtualRecord <- getVirtualRecord(hqRegionFn, lqRegionFn)
+
+ # fetch original polymerase record
+ polyFn <- paste(test_data_path, "polymerase/internal.polymerase.bam", sep="/")
+ polyRecord <- getPolymeraseRecord(polyFn)
+
+ # check
+ compareRecords(polyRecord, virtualRecord)
+})
+
+test_case("PolymeraseStitching_ProductionSubreadsToOriginal", {
+
+ # stitch virtual polymerase record
+ subreadsFn <- paste(test_data_path, "polymerase/production.subreads.bam", sep="/")
+ scrapsFn <- paste(test_data_path, "polymerase/production.scraps.bam", sep="/")
+ virtualRecord <- getVirtualRecord(subreadsFn, scrapsFn)
+
+ # fetch original polymerase record
+ polyFn <- paste(test_data_path, "polymerase/production.polymerase.bam", sep="/")
+ polyRecord <- getPolymeraseRecord(polyFn)
+
+ # compare
+ assertEqual(polyRecord$FullName(), virtualRecord$FullName())
+ assertEqual(polyRecord$HoleNumber(), virtualRecord$HoleNumber())
+ assertEqual(polyRecord$NumPasses(), virtualRecord$NumPasses())
+ assertEqual(polyRecord$Sequence(), virtualRecord$Sequence())
+ assertEqual(polyRecord$DeletionTag(), virtualRecord$DeletionTag())
+ assertEqual(polyRecord$SubstitutionTag(), virtualRecord$SubstitutionTag())
+
+ compareFrames(polyRecord$IPD(), virtualRecord$IPDV1Frames())
+ assertEqual(polyRecord$ReadGroup()$Id(), virtualRecord$ReadGroup()$Id())
+
+ tolerance = 1e-5
+ assertTrue( abs(polyRecord$ReadAccuracy()$ToFloat() - virtualRecord$ReadAccuracy()$ToFloat()) <= tolerance )
+ # assertEqual(polyRecord$ReadAccuracy()$ToFloat(), virtualRecord$ReadAccuracy()$ToFloat())
+
+ assertEqual(polyRecord$Qualities()$Fastq(), virtualRecord$Qualities()$Fastq())
+ assertEqual(polyRecord$DeletionQV()$Fastq(), virtualRecord$DeletionQV()$Fastq())
+ assertEqual(polyRecord$InsertionQV()$Fastq(), virtualRecord$InsertionQV()$Fastq())
+ assertEqual(polyRecord$MergeQV()$Fastq(), virtualRecord$MergeQV()$Fastq())
+ assertEqual(polyRecord$SubstitutionQV()$Fastq(), virtualRecord$SubstitutionQV()$Fastq())
+})
+
+test_case("PolymeraseStitching_ProductionHQToOriginal", {
+
+ # stitch virtual polymerase record
+ hqRegionFn <- paste(test_data_path, "polymerase/production_hq.hqregion.bam", sep="/")
+ lqRegionFn <- paste(test_data_path, "polymerase/production_hq.scraps.bam", sep="/")
+ virtualRecord <- getVirtualRecord(hqRegionFn, lqRegionFn)
+
+ # fetch original polymerase record
+ polyFn <- paste(test_data_path, "polymerase/production.polymerase.bam", sep="/")
+ polyRecord <- getPolymeraseRecord(polyFn)
+
+ # compare
+ assertEqual(polyRecord$FullName(), virtualRecord$FullName())
+ assertEqual(polyRecord$HoleNumber(), virtualRecord$HoleNumber())
+ assertEqual(polyRecord$NumPasses(), virtualRecord$NumPasses())
+ assertEqual(polyRecord$Sequence(), virtualRecord$Sequence())
+ assertEqual(polyRecord$DeletionTag(), virtualRecord$DeletionTag())
+ assertEqual(polyRecord$SubstitutionTag(), virtualRecord$SubstitutionTag())
+
+ compareFrames(polyRecord$IPD(), virtualRecord$IPDV1Frames())
+ assertEqual(polyRecord$ReadGroup()$Id(), virtualRecord$ReadGroup()$Id())
+
+ tolerance = 1e-5
+ assertTrue( abs(polyRecord$ReadAccuracy()$ToFloat() - virtualRecord$ReadAccuracy()$ToFloat()) <= tolerance )
+ # assertEqual(polyRecord$ReadAccuracy()$ToInt(), virtualRecord$ReadAccuracy()$ToInt())
+
+ assertEqual(polyRecord$Qualities()$Fastq(), virtualRecord$Qualities()$Fastq())
+ assertEqual(polyRecord$DeletionQV()$Fastq(), virtualRecord$DeletionQV()$Fastq())
+ assertEqual(polyRecord$InsertionQV()$Fastq(), virtualRecord$InsertionQV()$Fastq())
+ assertEqual(polyRecord$MergeQV()$Fastq(), virtualRecord$MergeQV()$Fastq())
+ assertEqual(polyRecord$SubstitutionQV()$Fastq(), virtualRecord$SubstitutionQV()$Fastq())
+
+ assertTrue(polyRecord$HasDeletionQV())
+ assertTrue(polyRecord$HasDeletionTag())
+ assertTrue(polyRecord$HasInsertionQV())
+ assertTrue(polyRecord$HasMergeQV())
+ assertTrue(polyRecord$HasSubstitutionQV())
+ assertTrue(polyRecord$HasSubstitutionTag())
+ assertTrue(polyRecord$HasIPD())
+ assertFalse(polyRecord$HasLabelQV())
+ assertFalse(polyRecord$HasAltLabelQV())
+ assertFalse(polyRecord$HasAltLabelTag())
+ assertFalse(polyRecord$HasPkmean())
+ assertFalse(polyRecord$HasPkmid())
+ assertFalse(polyRecord$HasPulseCall())
+ assertFalse(polyRecord$HasPulseWidth())
+ assertFalse(polyRecord$HasPrePulseFrames())
+ assertFalse(polyRecord$HasPulseCallWidth())
+ assertFalse(polyRecord$HasPulseCall())
+
+ assertTrue(virtualRecord$HasDeletionQV())
+ assertTrue(virtualRecord$HasDeletionTag())
+ assertTrue(virtualRecord$HasInsertionQV())
+ assertTrue(virtualRecord$HasMergeQV())
+ assertTrue(virtualRecord$HasSubstitutionQV())
+ assertTrue(virtualRecord$HasSubstitutionTag())
+ assertTrue(virtualRecord$HasIPD())
+ assertFalse(virtualRecord$HasLabelQV())
+ assertFalse(virtualRecord$HasAltLabelQV())
+ assertFalse(virtualRecord$HasAltLabelTag())
+ assertFalse(virtualRecord$HasPkmean())
+ assertFalse(virtualRecord$HasPkmid())
+ assertFalse(virtualRecord$HasPulseCall())
+ assertFalse(virtualRecord$HasPulseWidth())
+ assertFalse(virtualRecord$HasPrePulseFrames())
+ assertFalse(virtualRecord$HasPulseCallWidth())
+ assertFalse(virtualRecord$HasPulseCall())
+})
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+test_case("QualityValue_Defaults", {
+
+ value <- QualityValue()
+ assertEqual(0L, value$ToInt())
+ assertEqual('!', value$Fastq())
+})
+
+test_case("QualityValue_FromNumber", {
+
+ zero <- QualityValue(0)
+ thirtythree <- QualityValue(33)
+ normal <- QualityValue(42)
+ maxQV <- QualityValue(93)
+ tooHigh <- QualityValue(94)
+ max8bit <- QualityValue(126)
+
+ assertEqual(0L, zero$ToInt())
+ assertEqual(33L, thirtythree$ToInt())
+ assertEqual(42L, normal$ToInt())
+ assertEqual(93L, maxQV$ToInt())
+ assertEqual(93L, tooHigh$ToInt())
+ assertEqual(93L, max8bit$ToInt())
+
+ assertEqual('!', zero$Fastq())
+ assertEqual('B', thirtythree$Fastq())
+ assertEqual('K', normal$Fastq())
+ assertEqual('~', maxQV$Fastq())
+ assertEqual('~', tooHigh$Fastq())
+ assertEqual('~', max8bit$Fastq())
+})
+
+test_case("QualityValue_FromFastq", {
+
+ zero <- QualityValue_FromFastq('!')
+ thirtythree <- QualityValue_FromFastq('B')
+ normal <- QualityValue_FromFastq('K')
+ maxQV <- QualityValue_FromFastq('~')
+
+ assertEqual(0L, zero$ToInt())
+ assertEqual(33L, thirtythree$ToInt())
+ assertEqual(42L, normal$ToInt())
+ assertEqual(93L, maxQV$ToInt())
+})
+
+test_case("QualityValues_Defaults", {
+ values <- QualityValues()
+ assertEqual(0L, nchar(values$Fastq()))
+})
+
+test_case("QualityValues_FromNumbers", {
+
+ fastqString <- "~~~KKBB!!"
+ values <- c(93, 93, 93, 42, 42, 33, 33, 0, 0)
+
+ qvs <- QualityValues()
+ for (v in values)
+ qvs$push_back(QualityValue(v))
+
+ assertEqual(fastqString, qvs$Fastq())
+})
+
+test_case("QualityValues_FromFastq", {
+
+ fastqString <- "~~~KKBB!!"
+ values <- c(93L, 93L, 93L, 42L, 42L, 33L, 33L, 0L, 0L)
+
+ qvs <- QualityValues(fastqString)
+ assertEqual(nchar(fastqString), qvs$size())
+ assertEqual(length(values), qvs$size())
+
+ numValues <- length(values)
+ for ( i in 1:numValues ) {
+ qv <- qvs$'__getitem__'(i-1)
+ assertEqual(values[i], qv$ToInt())
+ }
+})
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+# main test suite runner
+run_test_suite <- function(path) {
+ test_files <- dir(path, "^test.*\\.[rR]$", full.names = TRUE)
+ lapply(test_files, run_test_file)
+ invisible()
+}
+
+run_test_file <- function(filename) {
+ source(filename)
+ invisible()
+}
+
+# main test case definition
+#
+# Example:
+# test_case("name", {
+# ...tested code here...
+# })
+#
+test_case <- function(name, code) {
+ test_case_runner(name, substitute(code))
+ invisible()
+}
+
+# main assert definitions
+#
+# assertEqual(expected, actual)
+# assertNotEqual(expected, actual)
+# assertTrue(expr)
+# assertFalse(expr)
+#
+
+assertEqual <- function(expected, actual) {
+ assertHelper(identical(expected, actual),
+ TRUE,
+ expression_string("expected"),
+ deparse(expected),
+ expression_string("actual"),
+ deparse(actual),
+ "==")
+}
+
+assertAllEqual <- function(expected, actual) {
+ assertHelper(all.equal(expected, actual),
+ TRUE,
+ expression_string("expected"),
+ deparse(expected),
+ expression_string("actual"),
+ deparse(actual),
+ "==")
+}
+
+assertNotEqual <- function(expected, actual) {
+ assertHelper(identical(expected, actual),
+ FALSE,
+ expression_string("expected"),
+ deparse(expected),
+ expression_string("actual"),
+ deparse(actual),
+ "!=")
+}
+
+assertTrue <- function(expr) {
+ assertHelper(as.vector(expr),
+ TRUE,
+ "TRUE",
+ "TRUE",
+ expression_string("expr"),
+ deparse(expr),
+ "==")
+}
+
+assertFalse <- function(expr) {
+ assertHelper(as.vector(expr),
+ FALSE,
+ "FALSE",
+ "FALSE",
+ expression_string("expr"),
+ deparse(expr),
+ "==")
+}
+
+# TODO: (if needed) assertLessThan, assertGreaterThan, assertNull, etc
+
+# ------------------------------- #
+# internals
+# ------------------------------- #
+
+expression_string <- function(name, env = parent.frame()) {
+ subs <- do.call("substitute", list(as.name(name), env))
+ paste0(deparse(subs, width.cutoff = 500), collapse = "\n")
+}
+
+assertHelper <- function(compare,
+ to,
+ expected_expr,
+ expected_value,
+ actual_expr,
+ actual_value,
+ compare_type)
+{
+ success <- identical(compare, to)
+
+ result <- make_assert_result(success,
+ expected_expr,
+ expected_value,
+ actual_expr,
+ actual_value,
+ compare_type)
+
+
+ # record result with testCaseCollector
+ testCaseResults <- test_case_results()
+ testCaseResults$add_result(result)
+ invisible()
+}
+
+make_assert_result <- function(success,
+ expected_expr,
+ expected_value,
+ actual_expr,
+ actual_value,
+ compare_type)
+{
+ structure(list(
+ success = success,
+ expectedExpression = expected_expr,
+ expectedValue = expected_value,
+ actualExpression = actual_expr,
+ actualValue = actual_value,
+ compareType = compare_type
+ ))
+}
+
+TestCaseResults <- setRefClass("TestCaseResults",
+
+ fields = list(
+ test = "character",
+ anyFailed = "logical",
+ results = "list"
+ ),
+ methods = list(
+ initialize = function(...) {
+ test <<- ""
+ anyFailed <<- FALSE
+
+ initFields(...)
+ },
+
+ start_test = function(name) {
+ test <<- name
+ results <<- list()
+ },
+
+ add_result = function(result) {
+ if (!result$success)
+ anyFailed <<- TRUE
+ results <<- c(results, list(result))
+ },
+
+ end_test = function() {
+
+ # summarize test case results
+ testOutput <- format_results()
+
+ # report to test collector
+ suiteResults <- test_suite_results()
+ suiteResults$add_test_case_result(anyFailed, testOutput)
+
+ # reset
+ test <<- ""
+ anyFailed <<- FALSE
+ results <<- list()
+ },
+
+ format_results = function() {
+
+ lines <- list()
+
+ status <- "OK"
+ if (anyFailed)
+ status <- "FAILED"
+
+ headerLine <- paste("TestCase:", test, "...", status, sep=" ")
+ lines <- c(lines, list(headerLine))
+
+ for (result in results) {
+ if (!result$success) {
+ valueOfLabel <- paste(result$actualExpression, result$compareType, result$expectedExpression, sep=" ")
+ valueOf <- paste(" Value of:", valueOfLabel, sep=" ")
+ actual <- paste(" Actual:", result$actualValue, sep=" ")
+ expected <- paste(" Expected:", result$expectedValue, sep=" ")
+ lines <- c(lines, valueOf, actual, expected, "")
+ }
+ }
+ invisible(lines)
+ }
+ )
+)
+
+TestSuiteResults <- setRefClass("TestSuiteResults",
+
+ fields = list(
+ numTests = "integer",
+ numFailed = "integer",
+ results = "list"
+ ),
+ methods = list(
+ initialize = function(...) {
+ numTests <<- 0L
+ numFailed <<- 0L
+ results <<- list()
+
+ initFields(...)
+ },
+
+ add_test_case_result = function(testHasFailures, testOutput) { #(results)
+ numTests <<- numTests + 1L
+ if (testHasFailures)
+ numFailed <<- numFailed + 1L
+ results <<- c(results, testOutput)
+ },
+
+ any_failed = function() {
+ return (numFailed != 0L)
+ },
+
+ print_summary = function(...) {
+
+ cat("\n")
+ cat("-------------------------\n")
+ cat("Tests Complete\n")
+ cat("-------------------------\n")
+ cat("\n")
+
+ for (result in results) {
+ cat(result)
+ cat("\n")
+ }
+ cat("-------------------------\n")
+
+ if (numFailed == 1L) {
+ footer <- paste(numFailed, "test failed out of", numTests, sep=" ")
+ } else {
+ footer <- paste(numFailed, "tests failed out of", numTests, sep=" ")
+ }
+ cat(footer)
+ cat("\n\n")
+ }
+ )
+)
+
+test_env = new.env()
+test_env$testSuiteResults <- TestSuiteResults$new()
+test_env$testCaseResults <- TestCaseResults$new()
+
+test_suite_results <- function() {
+ test_env$testSuiteResults
+}
+
+test_case_results <- function() {
+ test_env$testCaseResults
+}
+
+test_case_runner <- function(name, code) {
+ testCaseResults <- test_case_results()
+ testCaseResults$start_test(name)
+ eval(code, test_env)
+ testCaseResults$end_test()
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef TESTDATA_H
+#define TESTDATA_H
+
+#include <string>
+
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
+const std::string Source_Dir = std::string("@PacBioBAM_TestsDir@");
+const std::string Bin_Dir = std::string("@CMAKE_CURRENT_BINARY_DIR@");
+const std::string Data_Dir = std::string("@PacBioBAM_TestsDir@/data");
+const std::string Generated_Dir = std::string("@GeneratedDir@");
+const std::string GeneratedData_Dir = std::string("@GeneratedTestDataDir@");
+const std::string Bam2Sam = std::string("@PacBioBAM_BinDir@/bam2sam");
+
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
+#endif // TESTDATA_H
--- /dev/null
+Setup:
+
+ $ BAM2SAM="@PacBioBAM_BinDir@/bam2sam" && export BAM2SAM
+
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
+
+Normal:
+
+ $ $BAM2SAM < $DATADIR/phi29.bam | head -n 5
+ @HD\tVN:3.0.0\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\tPU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0 (esc)
+ @PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0 (esc)
+ @PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2 (esc)
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/2067_4072\t4\t*\t0\t255\t*\t*\t0\t0\tAAGTCATGTATAGAGTTATTGGCTCAGCGGTGGCAAGCAGCCAACTCAGCTCCTTTCGGGCTTGTTAGCAGCCGGATCCACACTCTGAAATTCCTGCAGCTCGAGTTATTTGATAGTAAAAGTGGGTCATCAAACCGCAACTACGCCACCCCGGTACCTGAACAGGCTTCGGTTTCATTTTGAGACGAGAAAAACCCACTTTGAAGTTTTCGAAAATCACTTCCTTTTTTGATTTTGTCCGTCATGCCTGCGCATTTCACAAGAGAACTTGATGTCAGTGTTAGTCGTCAGGAGAGCCTCTACCAGTTTGCCGTCTACTTCTTTCATGTAAATATCCTGGATGTAAGGTTTTCTGACGCAGATATTGTTGGCAACGCTTAAAAAGTGGGATTCGTGTGGCCAGTAGCCCAGGTTATCTTTCGGGTCCACGATGTCTTTGATAACGTCCAGAATTTCAGTACCAGTTCAGGTGAATAGAACGGTATCGCAGTTTCAGATAATACGATCATGACAGCCTGCGCTGGCTGTGATGGTCGTATAGCGTGCCCAGGCCGTGATAAAGACACCCATCCGGGGTATATACAGGGTCTTTCGTTTCCTCCCTAGCCTAAGAAAGAAAACCCAGAGCACCGTTCTCTTCAAGGTATGGCACTTTACCGTAACGTCCGGGTTGGAAAGCGAAATTTACCGTACAGGCCTGGTTCAGCATCAGCTTCCCGCCAGCTGTTTGATGGCGTCCCTTCAGAGGTAGTTTTGATATACGTCCATTTTGTCGATAAAGTCCTTGAACAGGCAAGTGGTTCCTTTGAAACTTCAGACCAAGAGGATAATATTCAACATTGTACAGGTCGTAGTGTCTTTCATCAGTTCCAGATCAAACATTAGAACAGCCACAAGGTCAGCAAATTTCACCGCCGGAAGATTTCAGGTAATTCGTTACGCTTGTAAAGAAATTGGGACGCTTGATCTGGGATGGTCGGGATCTAGCCTCCTTCAGTTCAAATTCACTTAACGAATGTGCTGAATGTGCCAGAGGGTAATCCTCGTCCAAAGACGGTATTTACCCTCGAAACGATCGGCTCGCCGTACGGCAACGCAGGCGAGAGTACATTCTGCAGCTGGGTACAGGGAATTAAACATCAAAAACCATACCTTCGCCGATCTCTTTTTCCTTTAAAACGTCATTCAGCCACGGTGAAGACCAACCACGGTAAGGCATAACGAACTTCCTTGTCCAGACCCAGGCTCAGGGTCCGGGAAAACCTTTTTAAACTTCTTGTGGTGATAATGTCTTTGAAGCCTTTCAGAAGTCAGAACCTGCCGTCATACGATCCAAGACCACTGCTTGAACCTGGATCAGCAGACTTCTGCGATAATCTGAATATCGTTTTTAATGTAGGCAATATTTCTTCCGGGTGATTTTGTAACCGACCGGGCGCTCATTATGATAAGTCGATGTCGCCTTTCAGTAGCCGTCAGTTAAAAGTCTTTCGCAATTTTCTTAACGCGGAAAGGCAAGTTTCTAGAGAGTCGTAAGATCACGTGTGGAATCTTGCGTTTACCCTTGTAACCCAGGCAATACAATCATATAACCACTGGCCCAATGCGAGAAATGATGGTGTTGTAGGTATTTTGGCAGACCATCTGCGGACCGCATTTAAAACGTTACGTTCCAGCCAGTTGATGATGAAATGCGCCATCAAAATTTCAGATTTGTGGAAGTACAGGTCCCAGCCTGAAAACTTTGCAGAAACCCAAGCCATAAAATTCATCCAGGGAGTTACCAATCTTATACTCGGAGTGGTCTTCGATGTTCATTGTAACCGTATGCCCATACGCGGCAATCTTCAAGCCTTGGTGTAGTCTCAAAGTCGCAGGAATACATTTACGTGGCATGTGTTTCATATGTCATTCCTTCTTAAAGTTAAACAAAATTATTTCTAGAGGGGAATTGTTTATCCGCCTCACATTTCCCTATAGTGAGTCGTATTAATTTCGCGGGATC\t*\tRG:Z:a955def6\tbc:B:S,1,1\tbq:i:1\tcx:i:31\tnp:i:1\tqe:i:4072\tqs:i:2067\trq:f:0.88458\tsn:B:f,22.8448,13.8689,14.6461,14.3552\tzm:i:30422 (esc)
+
+Explicit Filename (not stdin):
+
+ $ $BAM2SAM $DATADIR/phi29.bam | head -n 5
+ @HD\tVN:3.0.0\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\tPU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0 (esc)
+ @PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0 (esc)
+ @PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2 (esc)
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/2067_4072\t4\t*\t0\t255\t*\t*\t0\t0\tAAGTCATGTATAGAGTTATTGGCTCAGCGGTGGCAAGCAGCCAACTCAGCTCCTTTCGGGCTTGTTAGCAGCCGGATCCACACTCTGAAATTCCTGCAGCTCGAGTTATTTGATAGTAAAAGTGGGTCATCAAACCGCAACTACGCCACCCCGGTACCTGAACAGGCTTCGGTTTCATTTTGAGACGAGAAAAACCCACTTTGAAGTTTTCGAAAATCACTTCCTTTTTTGATTTTGTCCGTCATGCCTGCGCATTTCACAAGAGAACTTGATGTCAGTGTTAGTCGTCAGGAGAGCCTCTACCAGTTTGCCGTCTACTTCTTTCATGTAAATATCCTGGATGTAAGGTTTTCTGACGCAGATATTGTTGGCAACGCTTAAAAAGTGGGATTCGTGTGGCCAGTAGCCCAGGTTATCTTTCGGGTCCACGATGTCTTTGATAACGTCCAGAATTTCAGTACCAGTTCAGGTGAATAGAACGGTATCGCAGTTTCAGATAATACGATCATGACAGCCTGCGCTGGCTGTGATGGTCGTATAGCGTGCCCAGGCCGTGATAAAGACACCCATCCGGGGTATATACAGGGTCTTTCGTTTCCTCCCTAGCCTAAGAAAGAAAACCCAGAGCACCGTTCTCTTCAAGGTATGGCACTTTACCGTAACGTCCGGGTTGGAAAGCGAAATTTACCGTACAGGCCTGGTTCAGCATCAGCTTCCCGCCAGCTGTTTGATGGCGTCCCTTCAGAGGTAGTTTTGATATACGTCCATTTTGTCGATAAAGTCCTTGAACAGGCAAGTGGTTCCTTTGAAACTTCAGACCAAGAGGATAATATTCAACATTGTACAGGTCGTAGTGTCTTTCATCAGTTCCAGATCAAACATTAGAACAGCCACAAGGTCAGCAAATTTCACCGCCGGAAGATTTCAGGTAATTCGTTACGCTTGTAAAGAAATTGGGACGCTTGATCTGGGATGGTCGGGATCTAGCCTCCTTCAGTTCAAATTCACTTAACGAATGTGCTGAATGTGCCAGAGGGTAATCCTCGTCCAAAGACGGTATTTACCCTCGAAACGATCGGCTCGCCGTACGGCAACGCAGGCGAGAGTACATTCTGCAGCTGGGTACAGGGAATTAAACATCAAAAACCATACCTTCGCCGATCTCTTTTTCCTTTAAAACGTCATTCAGCCACGGTGAAGACCAACCACGGTAAGGCATAACGAACTTCCTTGTCCAGACCCAGGCTCAGGGTCCGGGAAAACCTTTTTAAACTTCTTGTGGTGATAATGTCTTTGAAGCCTTTCAGAAGTCAGAACCTGCCGTCATACGATCCAAGACCACTGCTTGAACCTGGATCAGCAGACTTCTGCGATAATCTGAATATCGTTTTTAATGTAGGCAATATTTCTTCCGGGTGATTTTGTAACCGACCGGGCGCTCATTATGATAAGTCGATGTCGCCTTTCAGTAGCCGTCAGTTAAAAGTCTTTCGCAATTTTCTTAACGCGGAAAGGCAAGTTTCTAGAGAGTCGTAAGATCACGTGTGGAATCTTGCGTTTACCCTTGTAACCCAGGCAATACAATCATATAACCACTGGCCCAATGCGAGAAATGATGGTGTTGTAGGTATTTTGGCAGACCATCTGCGGACCGCATTTAAAACGTTACGTTCCAGCCAGTTGATGATGAAATGCGCCATCAAAATTTCAGATTTGTGGAAGTACAGGTCCCAGCCTGAAAACTTTGCAGAAACCCAAGCCATAAAATTCATCCAGGGAGTTACCAATCTTATACTCGGAGTGGTCTTCGATGTTCATTGTAACCGTATGCCCATACGCGGCAATCTTCAAGCCTTGGTGTAGTCTCAAAGTCGCAGGAATACATTTACGTGGCATGTGTTTCATATGTCATTCCTTCTTAAAGTTAAACAAAATTATTTCTAGAGGGGAATTGTTTATCCGCCTCACATTTCCCTATAGTGAGTCGTATTAATTTCGCGGGATC\t*\tRG:Z:a955def6\tbc:B:S,1,1\tbq:i:1\tcx:i:31\tnp:i:1\tqe:i:4072\tqs:i:2067\trq:f:0.88458\tsn:B:f,22.8448,13.8689,14.6461,14.3552\tzm:i:30422 (esc)
+
+Header-Only:
+
+ $ $BAM2SAM --header-only < $DATADIR/phi29.bam | head -n 5
+ @HD\tVN:3.0.0\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\tPU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0 (esc)
+ @PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0 (esc)
+ @PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2 (esc)
+
+No-Header:
+
+ $ $BAM2SAM --no-header < $DATADIR/phi29.bam | head -n 5
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/2067_4072\t4\t*\t0\t255\t*\t*\t0\t0\tAAGTCATGTATAGAGTTATTGGCTCAGCGGTGGCAAGCAGCCAACTCAGCTCCTTTCGGGCTTGTTAGCAGCCGGATCCACACTCTGAAATTCCTGCAGCTCGAGTTATTTGATAGTAAAAGTGGGTCATCAAACCGCAACTACGCCACCCCGGTACCTGAACAGGCTTCGGTTTCATTTTGAGACGAGAAAAACCCACTTTGAAGTTTTCGAAAATCACTTCCTTTTTTGATTTTGTCCGTCATGCCTGCGCATTTCACAAGAGAACTTGATGTCAGTGTTAGTCGTCAGGAGAGCCTCTACCAGTTTGCCGTCTACTTCTTTCATGTAAATATCCTGGATGTAAGGTTTTCTGACGCAGATATTGTTGGCAACGCTTAAAAAGTGGGATTCGTGTGGCCAGTAGCCCAGGTTATCTTTCGGGTCCACGATGTCTTTGATAACGTCCAGAATTTCAGTACCAGTTCAGGTGAATAGAACGGTATCGCAGTTTCAGATAATACGATCATGACAGCCTGCGCTGGCTGTGATGGTCGTATAGCGTGCCCAGGCCGTGATAAAGACACCCATCCGGGGTATATACAGGGTCTTTCGTTTCCTCCCTAGCCTAAGAAAGAAAACCCAGAGCACCGTTCTCTTCAAGGTATGGCACTTTACCGTAACGTCCGGGTTGGAAAGCGAAATTTACCGTACAGGCCTGGTTCAGCATCAGCTTCCCGCCAGCTGTTTGATGGCGTCCCTTCAGAGGTAGTTTTGATATACGTCCATTTTGTCGATAAAGTCCTTGAACAGGCAAGTGGTTCCTTTGAAACTTCAGACCAAGAGGATAATATTCAACATTGTACAGGTCGTAGTGTCTTTCATCAGTTCCAGATCAAACATTAGAACAGCCACAAGGTCAGCAAATTTCACCGCCGGAAGATTTCAGGTAATTCGTTACGCTTGTAAAGAAATTGGGACGCTTGATCTGGGATGGTCGGGATCTAGCCTCCTTCAGTTCAAATTCACTTAACGAATGTGCTGAATGTGCCAGAGGGTAATCCTCGTCCAAAGACGGTATTTACCCTCGAAACGATCGGCTCGCCGTACGGCAACGCAGGCGAGAGTACATTCTGCAGCTGGGTACAGGGAATTAAACATCAAAAACCATACCTTCGCCGATCTCTTTTTCCTTTAAAACGTCATTCAGCCACGGTGAAGACCAACCACGGTAAGGCATAACGAACTTCCTTGTCCAGACCCAGGCTCAGGGTCCGGGAAAACCTTTTTAAACTTCTTGTGGTGATAATGTCTTTGAAGCCTTTCAGAAGTCAGAACCTGCCGTCATACGATCCAAGACCACTGCTTGAACCTGGATCAGCAGACTTCTGCGATAATCTGAATATCGTTTTTAATGTAGGCAATATTTCTTCCGGGTGATTTTGTAACCGACCGGGCGCTCATTATGATAAGTCGATGTCGCCTTTCAGTAGCCGTCAGTTAAAAGTCTTTCGCAATTTTCTTAACGCGGAAAGGCAAGTTTCTAGAGAGTCGTAAGATCACGTGTGGAATCTTGCGTTTACCCTTGTAACCCAGGCAATACAATCATATAACCACTGGCCCAATGCGAGAAATGATGGTGTTGTAGGTATTTTGGCAGACCATCTGCGGACCGCATTTAAAACGTTACGTTCCAGCCAGTTGATGATGAAATGCGCCATCAAAATTTCAGATTTGTGGAAGTACAGGTCCCAGCCTGAAAACTTTGCAGAAACCCAAGCCATAAAATTCATCCAGGGAGTTACCAATCTTATACTCGGAGTGGTCTTCGATGTTCATTGTAACCGTATGCCCATACGCGGCAATCTTCAAGCCTTGGTGTAGTCTCAAAGTCGCAGGAATACATTTACGTGGCATGTGTTTCATATGTCATTCCTTCTTAAAGTTAAACAAAATTATTTCTAGAGGGGAATTGTTTATCCGCCTCACATTTCCCTATAGTGAGTCGTATTAATTTCGCGGGATC\t*\tRG:Z:a955def6\tbc:B:S,1,1\tbq:i:1\tcx:i:31\tnp:i:1\tqe:i:4072\tqs:i:2067\trq:f:0.88458\tsn:B:f,22.8448,13.8689,14.6461,14.3552\tzm:i:30422 (esc)
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/4151_6183\t4\t*\t0\t255\t*\t*\t0\t0\tGATCCCGCGAATTAATTACGACTCACTATAGGGGAATTGTGAGCGGATAACAATTCCCGCCTCTAGAAATAATTTTGTTTAAACTTTTAAGAAAGGAGATATTACATATGAAACACAGCCACGTAAAATGTATTCCTGCGACTTGGAGACTACCACCAAGGTGAAGATTTGCCGCGTAATGGGCATACGGTTTACATGAAACATCGAAGAACAAACTCGAGTATAAGATTGGTAACTCCCCTGGATGAATTATGGCTTGGGTTACTGAAAGTTCGAGGTCTGACCTGTACTTCGCACAAATCTGAAAATTTGATGGCCGCAAATTTCAATTCATCACTGGCTGGAACGTAAACGGTTTTAAATGGTCCGCAGATCGGTCTGTGCCAAATACCCTGATCAACACATCATTTCTTCGCAATGCGGCCAGTGTAATGATTGATATCTTGCCCTGGGTTGACAAGGGGTAAACGCAAGATCCACACCTGTGATCTACGACCTCTCTGAAGAAAACTGCGTTTCCGGTTAAGAAAATTGCGAAAGACTTTAAGCTGAACGGTACTGAAAAGCGACATGACTATCATAATGAGCGCCCGGTCGTTACAAAATCACCCCGGAAAGAATATGCCTACATTTAAAAACGATATTCAGATTATCGCAAGAACTCTGCTGATCAGTTCAAGCAAGGGTCTGGATCGTAAATGACGGCAGGTTCTGACTCTCCTGAAAGGCTTCAAAAGACATTATCACCACCTAAAAGAAGTTTAAAAAGGTTTTTTCACCGACCCTGAGCCTAGGGCTGGACAAGGAAAGTTGTTAATGCCCATACCGTGGTGGTTTCACCTGGCTGAAAGACCGTTTTAAAGAAAAAGAGATCGGCGAAGGTATGGTTTTTGATGTTAATTCCCTGTAACCAAGCCTTCAATGTACTCTCGCCTGCTTGCCGTCACACGGGCGAGCGACGTATTCGAAAGGGTAAAATACGTTCTGGGACGGAGGATTTACCCTCTGCAATTCGGCACATTCCGTTGTGAATTTGGAACTGAAAGGAAGGCTTAGATCCCGACCATCCCAGATCAAGCGTTCCCATTTCTAACAAAGGGTAACGAATACCTGAAATCTTCCAGGCGGTGAAATTGCTGACCTGTGGCTGTCTAAATGTTTGATCTTGGAAACTGATGAAAGAGCACTACGACCTGGTACAATGTTGAATATATCTCTGGTCTGAAGTTCAAAGCAACCACTGGCCTGTTCAAGGACTTTATCGACAATGGACGTATATCAAAAGACTACCTCTGAAGACGCCATCAAACAGCTGGCGAAGCTGATGCTGACAAGCCTGTACGGTAAATTCGCGTCCCACCCGGACGTTTACCGGGTAAAGTGCCATATGCTGAAAGAGAAAGCGGTGCTCTGGTTTTTCGTCTAGGTGGAAGGAGGAAACGAAGACACTGTATATACCGCCGAATGGGTGTCTTTATCCAAGCGGCCTGGCACGCTATACGACCATCACAGGCAAGCGCAGGCTTTTGTTAATGATCGTATTATCTACTGCGATTACCGATTCTACTTCACTGACTGGTACTGAAATCTGGACGTTATCAAAGACATCGTAGACCCGAAGAAACTGGGCTACTGGCACACGAATTCCACTTTAAGCGTGCAAAATATCTGCGTCAGAAAACCTACATCCCAGGATATTTACATGAAAGAAGTAGACGGCAAACTGGTAGAGGGCTCTCCGTGACGACTACACTGACATCAAGTTCTCTGTGAAATGCGCAGGCAAATGACGGCACAAAATCCAAAAAAGGAAGTGACTTTCGAAAACTTCAAAAGTGGGTTCTCGTAAAATGAAACCGAAAGCTGTTCAGGTTTAAACCCGGGTGGCGTAGTGCCTGGTTGATGAACACTTTTTACTATCAAAATAACTTCGAAAGCTGCAGGAATTCAAGCTGATCCGGCTGCTAACAAAGCCCGAAGGAAGCTGAGTTGGCTGCTGCCACCGTGAGCAATACTCTAAATACATGACTCT\t*\tRG:Z:a955def6\tbc:B:S,1,1\tbq:i:1\tcx:i:31\tnp:i:1\tqe:i:6183\tqs:i:4151\trq:f:0.88458\tsn:B:f,22.8448,13.8689,14.6461,14.3552\tzm:i:30422 (esc)
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/6234_8214\t4\t*\t0\t255\t*\t*\t0\t0\tAGAGTCATGTATAAGAGTTATTGCTCAGCGGTGGCAGCAGACAACTCAGCTTCCTTTCGGGCCTTTGTTAGCAGCCGGATCCAAGCTTGAATTCCTGCAAGCTCGAGTTATTTGATAGTAAAAGTGTCATCAAACCAGCACTACGGCCGAACCCGGTACCTGAACAGATTCGTTTCATTTTACGAGAAAAACCCACTTTGAAGTTTTGCCGAAAGTCACTTCTTTTTGATTTGTCCGTCATGCTGCGCATTTCACAGAGACTTGAATGTCAGTGTAGTCGTCATCGGGGGGGGGAAGAGCCCTCTACCAGTTTTGCCGTCTACTTCTTTCATGTAAATATCTGGATGTAGGTTTTCTGAACGCAGATATTTGCAGCTTAAAAGTGGATTCGTGTGCCCAGTAGCCCGTTTTCTTCGGGTCCTACGATGTCTTTTGATAACGTCCAGAATTTCAGTACCAGTCAGGTGAATAGAATCGGTATCGCAGTAGATAAATACGATCATAACAAGCCTGCGCTGCCTGTTGATGGTCGTATAGCGTGCCCAGGCCCGTGATAAAGAACCATCGGGGTATATAACAGGGTCTTTCGTTCCTCCTCACCTAGACGAAAAACCCAGAGCACCGTTCTCTTTCAAGGTATGGCCTTTACCGGTAACGTCCGGGTTGGACGCGAATTTAAGCCGTAACAGGCTGTCTCAGCATACAGCTTTCGCCAGCCTGTTTGATGGCCGTCTTCAGAGGTAGTTTTGATATACGTTCCATTTGTCGATAAAGTCCCTTGAGCAGGCCCAGTGGGTTGGCTTTGAACTCAGACGCAGAATATATTCAACATTGTAACAGGTCGTAGTGCTCTTTCATCAGTTCAGATTCAACATTAGACAAGCCACAGGTCAGCATTTCACCGCCGGGAAGAATTTCAAGGTATTCGTTTACCCTTGGTAGAAATGGAACGCTTGTAATCTGGATGGTCGGGATCTAGCCTTTTCAGTTCAAAATTCACACGAATGTGCTGAATGTGCAGAGGGTAATACCTCGTCCCAGACGTATTTACCCTCGAATAAGCGAATCGGCTCGCCGTATCGCAGCAGGCGAGAGTAAACATTTGAGCTGGGTAACAGGGAATTACATCCAAAACCATACCCTTTCGCACGATCTCTTTTTCTTTAAAACGTCATTCAGCCAGGTGAAACCACCAGGTAGGCATAACGAACTTCCTGTCCAGACCCAAGGCTCAGGTCGGGAAAACTTTTTAAACTTCCTTGTGGTGATAATGTCTTTTGAAGCTTTCAGAGAGTCAGAACCTGCGTCATACGATCCAGACCGCTGCTTGAAGCTGGATCAGCAAGCTTCTGCGATAATCTGAATATCGTTTATTAATTAGGCATATTCTTCCGGGTGATTTTGTAACCGACCGGGCGCTCATTATGATAGTCGATGTCGCCCCTTTCAAGTACCGTCAGCTAAAGTCTTTCGCATTTTCTTACCGGAAACGGCAGTTTCTTCAGAGAGTCGTAAGATCACGTGTGGATCTTGCGTTTACCGCTTGTAACCCAGGCAAATATCAATCATATACCACTGGCCATGCGAGAAATGATGGTGTTGTAGGTTATTGGCAGACCATCTGCGGACCATTTTAAAACCGTTACGTTCAGCCAGTTGATGAATGAATGCGCCATGCAAATTTCAGATTGTGGAAGTACAAGGTCAGCCTGACTTTCAGAACCCAAAGCCATAAATTCATCCAGGGAGTTACCATCTTATACTCCGGAGTTGGTCTTCGATGTTCATGTAACCGTATGCCCATACGCGGCAAATCTTCAACCTTGGTGTGTAGTCCTCAAAGTCGCAGGAATACATTTACGTGGCATGTGTTTTCATAATGTATATCTCCTTCTTAAAGTTAAACAAAATTATTTCTAGAGGGGAATTGTTATCCCGCTCACAATCCCCTATCAGTGAGTCGTATTAATTTCGCGGATC\t*\tRG:Z:a955def6\tbc:B:S,1,1\tbq:i:1\tcx:i:31\tnp:i:1\tqe:i:8214\tqs:i:6234\trq:f:0.88458\tsn:B:f,22.8448,13.8689,14.6461,14.3552\tzm:i:30422 (esc)
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/8294_10277\t4\t*\t0\t255\t*\t*\t0\t0\tGATTCCCGCGAAATTAATACGAATCACTATAAGGGGAATTGTGAGCGGATAACAATTCCCCTCTAGAAATAATTTTGTTTAACTTTAAGAGGGACGATATACATATGAACACATGCCTACGTAAAATGTATTCCTGCGAACTGTTGAGACTACCACCAAGGTTGAAGATTTGCCGCGTAATGGGCATACGGTTACATGAACATCGAAGACCACTCCGATATGAAGATTGGTTAACCCCTGGATGAATTTATGGCTTGGGTTCTGAAAGTTCAGGCTGACCTGTACTTCACAATCTGAAATTTGATGGCCGCATTCATCAATCACTGGCTGGAACGTAAAACGGTTTAAAAATGGTCCCGCAGATGGTCTGACAAATTAACTACAACACCATCATTTCTCGCATGGGCCCAGTGGTATATGAAATTGATATTTGCCTGGGTTACAAGGAGGTAAACGCAAGATCCACACGTGGATCTACGACTCTTCTGAAGAAACCTGGCCGTTTCCGTTAAGAAAATGCGAAAGAACTTAAGCTGACGGTAACTGAAAGGCGACATCGACTATCATATAATGAAGCGCCCGTCGTTACAAAATCACCCCGGAAGAATATGCCTTACATTAAAAAACGATATTCAGATTTCGCAGAAGCTCTGCTGATCCAGTTCAAAGCAGGGTCCTGGATCGTAATGACGGCAGGTTCTGACTCTCTGAAAGGCTTCAAAGAACATTATCACCCACCAAGAAGTTTAAAAAGGTTTTCCCGACAACTGAGCCTGGGTCTGGACAAGGAAGTTTCGTTTGCCTACCGTGGTGGTTTTCAACCTGCTGACTGAACCGTTTTAAAAGAAAATAGAGATCGGCGGAAAGGTATGGTTTTTGATGTTAATTCCTGTAACCAGCCTCAAAATGTACTCTCGCCTGCTGCCGTACGGCGGCCGATCGTATTCGAAGGGTAAATACGTCTGGGACCGAGGATAGCCCTCTGCACATTCAGCACATTCGTTGTGAAATTTGAACTGAAGGAAGCTGATCCCGACGCATCCAGATCAAGCGTTCCCATTTTCTACAAGGTAACGAATACCTGAAATCTTCCCGGCGGTGAAATTGCTGCCTGTGGCTGTCTAATGTTGATCTGGAAACTGATGAAAGAGCACTACGAGACCTGTACAATGTTGAATATATCTCTGGTCTGAAGTTCAAAGCAACCACTGGCCTGTTCAAGGACTTTATCGACAAATGGCGTATTATCAAAACTACCTCTGAAGACGCCATCAAACAGCTGGCGAAGCTGATGCTGACAGCCTGTACGGTAAATTCGCGTCGCAACCCGGACGTTTCCGTAAAGTGCCCATACCTGAAAGAGAAACGGTGCTCTGGGTTTTCGTCTAGGTGAGGAGGAAACGAAAGACCCTGTAATATACCCGATGGTGTCTTTTATCACGGCCTGGGCACGCTAGTACGACCAATCACAGCAGCGCAGGCTTGTTATGATCGTATTTCTACTGCGGATACCGATTCTATTCCACCTGACTGGTACTGAAATTCTGGAACGTTATCAAAGACATCGTAGACCCGAAGAAACTGGGCTACTGGGGCACCACGAATCCACTTTTAAGCGTGGCAAAATATCTGACGTCAGAAAACCTACATCCAGGATATTTACATGAAAGAAGTAGACGGCAACTGTAGAGGGCTCTTCCTGACGAACCTACACTGACATCAAGTTCTCTGTGAAATGCGCAGGCATGACGGACCAAAATCAAAAAGGAAGTGAACTTTTCGAAAACTTCAAAGTGGGTTTTCTCGTAAAATGAAACCGAAGCCTGTCAGGTACCGGGTGGCGTAGTGCTGGTTGATCGGACACTTTACTATCAATAACTCGAGCTGCAGAATTCCAAGCTTGGATTCCGGCTGCTAACAAAGCCCGAAAGGAAGCTGAGTTGGCTGCTGCACCGCTGAGCAATAACTCTATACATGACTCAT\t*\tRG:Z:a955def6\tbc:B:S,1,1\tbq:i:1\tcx:i:31\tnp:i:1\tqe:i:10277\tqs:i:8294\trq:f:0.88458\tsn:B:f,22.8448,13.8689,14.6461,14.3552\tzm:i:30422 (esc)
+ m140918_150013_42139_c100697631700000001823144703261565_s1_p0/30422/10327_12283\t4\t*\t0\t255\t*\t*\t0\t0\tAGAGTCATGTATAGAGTTATTGCTCAGCGGTGGCAGCACCAACTCAGCTTCCTTTCGGCTTTGTTAGCAGCCGATCCAAGCTTGAATTCCTGCAGCTCGGAGTTATTTGATAGTAAAAGTTGTCATCCAAACGCAGCACTACGCCCACCCGTACCTGAACAGGCTTTCGGTTTCATTTTACGAGAAAAACACTTTTGAAAGTTTTCGAAAGTCACTTCCTTTTTTGATTTTGTCCGTCATGCCTGCGCATTTCACAGAGAACTTGATGTCAGTGTAGTCGTCAGGAGAGCCCTCTACCAGTTTGCCGTCTACTTCTTTCATGTAAATATCCTGGAATGTAGGTTTTTCTGACGCAGATTATTTTGCACGCTTAAAAGTGGATTCGTGTGGCCCCAGTAGCCCAGTTTCTTCGGTCTACGATGTCTTTGATACGTCCAGAATTTCAGTAAACAGTCAGGTGAATAGAAATCCGGTATCGCAGTAGAATAATACGATCATAACAACCTGCGCTGCTGTGTGGTCGTATAGCGTGCCCAGGCCGTGATAACAGACACCTCGGGGTAATATACAGGGTCTTTCCGTTCCTCCTCAACCTAGACGAAACCCAGAGCACCGTTCTCTTTTCAGGTATGGCACTTTAACCGGTACGTCCGGGTTGGACGCGAATTTACCGTAGCAGGCTGTTCAGCATCAGCTTTCGCCAGCCTGTTTGATGGCGCTCTTCAGAGGTAGTTTGAATATACGTCCATTTGTCGAATAAAGTCCTTGGAACAGGCCCAGTGGTTGCTTTGAACTTCCAGACCAGAGATATATTTCAACATTGTACAGGTCGTAGTGCTCTTTCCACTCAGTTCCAGATCAACATTAAGACAGCCACAGGTCAGATTTCCCCGCCGGAAGATTCAGGTAATTCTAGTTACCCTTGTAGAAATGGCGACGCTTGATCTGGATGGTCGGGATCCTAGCTTCCCTTCAGTTCAAATTCACAACGAATGTTGCTGAATCTGTGCAGAGGGTAATCCTCGGTCCAGACGTATTTACCCTCGAATACGATGCTCGCCGTACGGCAGCAGCGAGAGTACATTTGAGCTGGTACAGGGAATTAACATCAAAAAACATACTTCGCCGATCTCTTTTTCTTTAAAACGGTCATTCAGCCAGGTGAAACCACCACGGTAGGCATAACGAAACTTCCTGTCCAGACCCAGGCTCAGGTCGGAAAACTTGTTAAACTTCTTGGTGGTGATAATGTCTTTGAAAGCCTTTCAGGAAGTCAGAACCATGCCGTCATCCGATCCAGACCCCTGCTTTGAACTGGAATCAGCAGAGGCTCTGCGATAATCGAATATCGTTTTTAAATGTAGGCATATTTTCTTCGGGGTGATTTGTAACGCGACCGGGCGCTCATTATGATAGTCGATGTCGCCTTTCAGTACCGTCAGCTTAAAGTCTTTCGCAATTTTCTTAACCGACGGCAGTTTCTTCAGAGAGGTCGTAGATCACGGTGTGGATCTTGCGTTTACCCTTGTAACCAGGCAAATATCAATCATATACCACTGGCCCATGCGAGAATGATGGTGTTGTAGGTATTTGGCAGACGCATCTGCGGACCATTTAAACCGTTACGTTCCAGCCAGTTGATGATGAATGCGCCCATCATTTCAGATTTGTGGAAGGTACAGGTCAGCCTGAACTTGTCAGAAACCCAAGCCATAAATTCATCCAGGGAGTACATCTTATAATCTCGAAGTGGTCTTCGATGTTCATGTAACCGTATGCCCATACGCGCAATCTTCACCTTGGTGGTAGTCTGCAGTCGCAGAATAATTTTACGTGGCATGTGTTTCATATGTTATTAGTCTCCTTCTTAAAGTTAAACAAAATTATTTTTAGAAGGGGAATTGTTATCCGCTCACAATTCCCCTATAGTGGAGTCGTATTAATTTCGCGGGTATC\t*\tRG:Z:a955def6\tbc:B:S,1,1\tbq:i:1\tcx:i:31\tnp:i:1\tqe:i:12283\tqs:i:10327\trq:f:0.88458\tsn:B:f,22.8448,13.8689,14.6461,14.3552\tzm:i:30422 (esc)
+
+Invalid-Args:
+
+ $ $BAM2SAM --header-only --no-header < $DATADIR/phi29.bam
+
+ ERROR: conflicting arguments requested: --no-header and --header-only
+
+ Usage: bam2sam [options] [input]
+
+ bam2sam converts a BAM file to SAM. It is essentially a stripped-down 'samtools
+ view', mostly useful for testing/debugging without requiring samtools. Input BAM
+ file is read from a file or stdin, and SAM output is written to stdout.
+
+ Options:
+ -h, --help show this help message and exit
+ --version show program's version number and exit
+
+ Options:
+ input Input BAM file. If not provided, stdin will be used as input.
+ --no-header Omit header from output.
+ --header-only Print only the header (no records).
+ [1]
+
--- /dev/null
+Setup:
+
+ $ PBINDEXDUMP="@PacBioBAM_BinDir@/pbindexdump" && export PBINDEXDUMP
+
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
+
+Normal C++:
+
+ $ $PBINDEXDUMP --format=cpp $DATADIR/polymerase/production_hq.hqregion.bam.pbi
+ PbiRawData rawData;
+ rawData.Version(PbiFile::Version_3_0_1);
+ rawData.FileSections(PbiFile::BASIC);
+ rawData.NumReads(1);
+
+ PbiRawBasicData& basicData = rawData.BasicData();
+ basicData.rgId_ = {-898246524};
+ basicData.qStart_ = {2659};
+ basicData.qEnd_ = {7034};
+ basicData.holeNumber_ = {0};
+ basicData.readQual_ = {0.01};
+ basicData.ctxtFlag_ = {0};
+ basicData.fileOffset_ = {20054016};
+
+
+--(leave the blank lines above this)--
+
+Request C++, with JSON options (stdout includes usage/help, so we just want to check stderr):
+
+ $ $PBINDEXDUMP --format=cpp --json-indent-level=2 $DATADIR/polymerase/production_hq.hqregion.bam.pbi > /dev/null
+
+ ERROR: JSON formatting options not valid on non-JSON output
+
+ [1]
+
+ $ $PBINDEXDUMP --format=cpp --json-raw $DATADIR/polymerase/production_hq.hqregion.bam.pbi > /dev/null
+
+ ERROR: JSON formatting options not valid on non-JSON output
+
+ [1]
--- /dev/null
+Setup:
+
+ $ PBINDEXDUMP="@PacBioBAM_BinDir@/pbindexdump" && export PBINDEXDUMP
+
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
+
+Default settings (JSON):
+
+ $ $PBINDEXDUMP $DATADIR/polymerase/production_hq.hqregion.bam.pbi
+ {
+ "fileSections": [
+ "BasicData"
+ ],
+ "numReads": 1,
+ "reads": [
+ {
+ "contextFlag": 0,
+ "fileOffset": 20054016,
+ "holeNumber": 0,
+ "qEnd": 7034,
+ "qStart": 2659,
+ "readQuality": 0.00999999977648258,
+ "rgId": -898246524
+ }
+ ],
+ "version": "3.0.1"
+ }
+
+JSON indent level(2):
+
+ $ $PBINDEXDUMP --json-indent-level=2 $DATADIR/polymerase/production_hq.hqregion.bam.pbi
+ {
+ "fileSections": [
+ "BasicData"
+ ],
+ "numReads": 1,
+ "reads": [
+ {
+ "contextFlag": 0,
+ "fileOffset": 20054016,
+ "holeNumber": 0,
+ "qEnd": 7034,
+ "qStart": 2659,
+ "readQuality": 0.00999999977648258,
+ "rgId": -898246524
+ }
+ ],
+ "version": "3.0.1"
+ }
+
+JSON raw:
+
+ $ $PBINDEXDUMP --json-raw $DATADIR/polymerase/production_hq.hqregion.bam.pbi
+ {
+ "basicData": {
+ "ctxtFlag": [
+ 0
+ ],
+ "fileOffset": [
+ 20054016
+ ],
+ "holeNumber": [
+ 0
+ ],
+ "qEnd": [
+ 7034
+ ],
+ "qStart": [
+ 2659
+ ],
+ "readQual": [
+ 0.00999999977648258
+ ],
+ "rgId": [
+ -898246524
+ ]
+ },
+ "fileSections": [
+ "BasicData"
+ ],
+ "numReads": 1,
+ "version": "3.0.1"
+ }
--- /dev/null
+Setup:
+
+ $ TOOLS_BIN="@PacBioBAM_BinDir@" && export TOOLS_BIN
+ $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+ $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
+ $ INPUT_1="$DATADIR/dataset/bam_mapping_1.bam" && export INPUT_1
+ $ INPUT_2="$DATADIR/dataset/bam_mapping_2.bam" && export INPUT_2
+
+ $ MERGED_BAM="@GeneratedTestDataDir@/aligned_ordering_merged.bam" && export MERGED_BAM
+ $ MERGED_BAM_PBI="@GeneratedTestDataDir@/aligned_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
+
+Sanity Check:
+
+ $ $BAM2SAM --header-only $INPUT_1
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377 (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+
+ $ $BAM2SAM --no-header $INPUT_1 | cut -f 1,3,4 | head -n 10
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+
+ $ $BAM2SAM --header-only $INPUT_2
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377 (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+
+ $ $BAM2SAM --no-header $INPUT_2 | cut -f 1,3,4 | head -n 10
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+Normal Merge:
+
+ $ $PBMERGE $INPUT_1 $INPUT_2 > $MERGED_BAM
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+ @PG\tID:pbmerge-@PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+ $ rm $MERGED_BAM
+
+Shuffle Input:
+
+ $ $PBMERGE $INPUT_2 $INPUT_2 > $MERGED_BAM
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+ @PG\tID:pbmerge-@PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7046_7293\tlambda_NEB3011\t5136 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/38025/6255_7894\tlambda_NEB3011\t5427 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5311_5508\tlambda_NEB3011\t5943 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/36363/899_1197\tlambda_NEB3011\t6258 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/36363/605_853\tlambda_NEB3011\t6312 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/31174/0_1029\tlambda_NEB3011\t6487 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/31174/1075_1271\tlambda_NEB3011\t6499 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/38025/5743_6211\tlambda_NEB3011\t6606 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/50257/6944_7361\tlambda_NEB3011\t6942 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/50257/6546_6903\tlambda_NEB3011\t7010 (esc)
+
+ $ rm $MERGED_BAM
+
+Explicit Output Filename (also enables PBI):
+
+ $ $PBMERGE -o $MERGED_BAM $INPUT_1 $INPUT_2
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+ @PG\tID:pbmerge-@PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Found
+
+ $ rm $MERGED_BAM
+ $ rm $MERGED_BAM_PBI
+
+Explicit Output Filename (with disabled PBI):
+
+ $ $PBMERGE -o $MERGED_BAM --no-pbi $INPUT_1 $INPUT_2
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+ @PG\tID:pbmerge-@PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7247/7338_7831\tlambda_NEB3011\t4904 (esc)
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Not found
+
+ $ rm $MERGED_BAM
--- /dev/null
+Setup:
+
+ $ TOOLS_BIN="@PacBioBAM_BinDir@" && export TOOLS_BIN
+ $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+ $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
+ $ INPUT_XML="$DATADIR/polymerase/consolidate.subread.dataset.xml" && export INPUT_XML
+ $ BAM_1="$DATADIR/polymerase/production.subreads.bam" && export BAM_1
+ $ BAM_2="$DATADIR/polymerase/production.scraps.bam" && export BAM_2
+
+ $ MERGED_BAM="@GeneratedTestDataDir@/merged.bam" && export MERGED_BAM
+ $ MERGED_BAM_PBI="@GeneratedTestDataDir@/merged.bam.pbi" && export MERGED_BAM_PBI
+
+Sanity Check:
+
+ $ $BAM2SAM --no-header $BAM_1 | cut -f 1
+ ArminsFakeMovie/0/2659_3025
+ ArminsFakeMovie/0/3116_3628
+ ArminsFakeMovie/0/3722_4267
+ ArminsFakeMovie/0/4356_4864
+ ArminsFakeMovie/0/4960_5477
+ ArminsFakeMovie/0/5571_6087
+ ArminsFakeMovie/0/6199_6719
+ ArminsFakeMovie/0/6812_7034
+
+ $ $BAM2SAM --no-header $BAM_2 | cut -f 1
+ ArminsFakeMovie/0/0_2659
+ ArminsFakeMovie/0/3025_3047
+ ArminsFakeMovie/0/3047_3095
+ ArminsFakeMovie/0/3095_3116
+ ArminsFakeMovie/0/3628_3650
+ ArminsFakeMovie/0/3650_3700
+ ArminsFakeMovie/0/3700_3722
+ ArminsFakeMovie/0/4267_4289
+ ArminsFakeMovie/0/4289_4335
+ ArminsFakeMovie/0/4335_4356
+ ArminsFakeMovie/0/4864_4888
+ ArminsFakeMovie/0/4888_4939
+ ArminsFakeMovie/0/4939_4960
+ ArminsFakeMovie/0/5477_5498
+ ArminsFakeMovie/0/5498_5546
+ ArminsFakeMovie/0/5546_5571
+ ArminsFakeMovie/0/6087_6116
+ ArminsFakeMovie/0/6116_6173
+ ArminsFakeMovie/0/6173_6199
+ ArminsFakeMovie/0/6719_6740
+ ArminsFakeMovie/0/6740_6790
+ ArminsFakeMovie/0/6790_6812
+ ArminsFakeMovie/0/7034_7035
+
+Normal Merge from XML:
+
+ $ $PBMERGE -o $MERGED_BAM $INPUT_XML
+
+ $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+ Found
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Found
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:8aaede36\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:BAZ_FORMAT\tVN:0.3.0 (esc)
+ @PG\tID:PPA-BAZ2BAM\tVN:0.1.0 (esc)
+ @PG\tID:PPA-BAZWRITER\tVN:0.2.0 (esc)
+ @PG\tID:pbmerge-@PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/0/4267_4289
+ ArminsFakeMovie/0/4289_4335
+ ArminsFakeMovie/0/4335_4356
+ ArminsFakeMovie/0/4356_4864
+ ArminsFakeMovie/0/4864_4888
+ ArminsFakeMovie/0/4888_4939
+ ArminsFakeMovie/0/4939_4960
+ ArminsFakeMovie/0/4960_5477
+
+ $ rm $MERGED_BAM
+ $ rm $MERGED_BAM_PBI
+
+Normal Merge from XML (disabled PBI):
+
+ $ $PBMERGE --no-pbi -o $MERGED_BAM $INPUT_XML
+
+ $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+ Found
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Not found
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:8aaede36\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:BAZ_FORMAT\tVN:0.3.0 (esc)
+ @PG\tID:PPA-BAZ2BAM\tVN:0.1.0 (esc)
+ @PG\tID:PPA-BAZWRITER\tVN:0.2.0 (esc)
+ @PG\tID:pbmerge-@PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/0/4267_4289
+ ArminsFakeMovie/0/4289_4335
+ ArminsFakeMovie/0/4335_4356
+ ArminsFakeMovie/0/4356_4864
+ ArminsFakeMovie/0/4864_4888
+ ArminsFakeMovie/0/4888_4939
+ ArminsFakeMovie/0/4939_4960
+ ArminsFakeMovie/0/4960_5477
+
+ $ rm $MERGED_BAM
+
+Write to stdout:
+
+ $ $PBMERGE --no-pbi $INPUT_XML > $MERGED_BAM
+
+ $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+ Found
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Not found
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:8aaede36\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:CodecV1=ip;BINDINGKIT=FakeBindKit;SEQUENCINGKIT=FakeSeqKit;BASECALLERVERSION=0.2.0;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:BAZ_FORMAT\tVN:0.3.0 (esc)
+ @PG\tID:PPA-BAZ2BAM\tVN:0.1.0 (esc)
+ @PG\tID:PPA-BAZWRITER\tVN:0.2.0 (esc)
+ @PG\tID:pbmerge-@PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/0/4267_4289
+ ArminsFakeMovie/0/4289_4335
+ ArminsFakeMovie/0/4335_4356
+ ArminsFakeMovie/0/4356_4864
+ ArminsFakeMovie/0/4864_4888
+ ArminsFakeMovie/0/4888_4939
+ ArminsFakeMovie/0/4939_4960
+ ArminsFakeMovie/0/4960_5477
+
+ $ rm $MERGED_BAM
--- /dev/null
+Setup:
+
+ $ TOOLS_BIN="@PacBioBAM_BinDir@" && export TOOLS_BIN
+ $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+ $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
+ $ INPUT_FOFN="$DATADIR/merge.fofn" && export INPUT_FOFN
+ $ INPUT_1="$DATADIR/aligned.bam" && export INPUT_1
+ $ INPUT_2="$DATADIR/aligned2.bam" && export INPUT_2
+
+ $ MERGED_BAM="@GeneratedTestDataDir@/aligned_ordering_merged.bam" && export MERGED_BAM
+ $ MERGED_BAM_PBI="@GeneratedTestDataDir@/aligned_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
+
+Sanity Check:
+
+ $ $BAM2SAM --header-only $INPUT_1
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.3 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:0d7b28fa\tPL:PACBIO\tDS:READTYPE=SUBREAD;BINDINGKIT=100-619-300;SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100\tPU:singleInsertion\tPM:SEQUEL (esc)
+ @PG\tID:bwa\tPN:bwa\tVN:0.7.10-r1017-dirty\tCL:bwa mem lambdaNEB.fa singleInsertion.fasta (esc)
+
+ $ $BAM2SAM --no-header $INPUT_1 | cut -f 1,3,4 | head -n 10
+ singleInsertion/100/0_49\tlambda_NEB3011\t5211 (esc)
+ singleInsertion/200/0_49\tlambda_NEB3011\t5211 (esc)
+ singleInsertion/100/0_111\tlambda_NEB3011\t9378 (esc)
+ singleInsertion/100/0_111\tlambda_NEB3011\t9378 (esc)
+
+ $ $BAM2SAM --header-only $INPUT_2
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.3 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:b89a4406\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;FRAMERATEHZ=100;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+
+ $ $BAM2SAM --no-header $INPUT_2 | cut -f 1,3,4 | head -n 10
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+
+Normal Merge from FOFN:
+
+ $ $PBMERGE -o $MERGED_BAM $INPUT_FOFN
+
+ $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+ Found
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Found
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.3 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:0d7b28fa\tPL:PACBIO\tDS:READTYPE=SUBREAD;BINDINGKIT=100-619-300;SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100\tPU:singleInsertion\tPM:SEQUEL (esc)
+ @RG\tID:b89a4406\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;FRAMERATEHZ=100\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+ @PG\tID:bwa\tPN:bwa\tVN:0.7.10-r1017-dirty\tCL:bwa mem lambdaNEB.fa singleInsertion.fasta (esc)
+ @PG\tID:pbmerge-@PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ singleInsertion/100/0_49\tlambda_NEB3011\t5211 (esc)
+ singleInsertion/200/0_49\tlambda_NEB3011\t5211 (esc)
+ singleInsertion/100/0_111\tlambda_NEB3011\t9378 (esc)
+ singleInsertion/100/0_111\tlambda_NEB3011\t9378 (esc)
+
+ $ rm $MERGED_BAM
+ $ rm $MERGED_BAM_PBI
+
+Normal Merge from FOFN (disabled PBI):
+
+ $ $PBMERGE --no-pbi -o $MERGED_BAM $INPUT_FOFN
+
+ $ [ -f $MERGED_BAM ] && echo "Found" || echo "Not found"
+ Found
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Not found
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.3 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:0d7b28fa\tPL:PACBIO\tDS:READTYPE=SUBREAD;BINDINGKIT=100-619-300;SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100\tPU:singleInsertion\tPM:SEQUEL (esc)
+ @RG\tID:b89a4406\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;FRAMERATEHZ=100\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377\tPM:SEQUEL (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+ @PG\tID:bwa\tPN:bwa\tVN:0.7.10-r1017-dirty\tCL:bwa mem lambdaNEB.fa singleInsertion.fasta (esc)
+ @PG\tID:pbmerge-@PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1,3,4 | head -n 20
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/387_1134\tlambda_NEB3011\t303 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ singleInsertion/100/0_49\tlambda_NEB3011\t5211 (esc)
+ singleInsertion/200/0_49\tlambda_NEB3011\t5211 (esc)
+ singleInsertion/100/0_111\tlambda_NEB3011\t9378 (esc)
+ singleInsertion/100/0_111\tlambda_NEB3011\t9378 (esc)
+
+ $ rm $MERGED_BAM
--- /dev/null
+Setup:
+
+ $ TOOLS_BIN="@PacBioBAM_BinDir@" && export TOOLS_BIN
+ $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+ $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
+ $ UNALIGNED_BAM="$DATADIR/polymerase/internal.hqregions.bam" && export UNALIGNED_BAM
+ $ ALIGNED_BAM="$DATADIR/dataset/bam_mapping_1.bam" && export ALIGNED_BAM
+
+ $ MERGED_BAM="@GeneratedTestDataDir@/mixed_ordering_merged.bam" && export MERGED_BAM
+
+Sanity Check:
+
+ $ $BAM2SAM --header-only $UNALIGNED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+
+ $ $BAM2SAM --no-header $UNALIGNED_BAM | cut -f 1
+ ArminsFakeMovie/100000/2659_7034
+
+ $ $BAM2SAM --header-only $ALIGNED_BAM
+ @HD\tVN:1.3.1\tSO:coordinate\tpb:3.0.1 (esc)
+ @SQ\tSN:lambda_NEB3011\tLN:48502\tM5:a1319ff90e994c8190a4fe6569d0822a (esc)
+ @RG\tID:a9a22406c5\tDS:READTYPE=SUBREAD;BINDINGKIT=100356300;SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3;InsertionQV=iq;DeletionQV=dq;SubstitutionQV=sq;MergeQV=mq;SubstitutionTag=st;DeletionTag=dt\tPL:PACBIO\tPU:m140905_042212_sidney_c100564852550000001823085912221377_s1_X0\tSM:c100564852550000001823085912221377 (esc)
+ @PG\tID:BLASR\tVN:1.3.1.141565\tCL:/home/UNIXHOME/yli/for_the_people/blasr_bam_out/blasr m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 lambdaNEB.fa -out tmp.bam -bam -bestn 10 -minMatch 12 -nproc 8 -minSubreadLength 50 -minReadLength 50 -randomSeed 1 -clipping subread (esc)
+
+ $ $BAM2SAM --no-header $ALIGNED_BAM | cut -f 1,3,4 | head -n 10
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132\tlambda_NEB3011\t1 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/0_344\tlambda_NEB3011\t676 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/9936_10187\tlambda_NEB3011\t2171 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/6469/10232_10394\tlambda_NEB3011\t2204 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7468_8906\tlambda_NEB3011\t3573 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/5557_7235\tlambda_NEB3011\t4507 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/13473/7285_8657\tlambda_NEB3011\t4508 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/426_1045\tlambda_NEB3011\t4593 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/30983/7064_7421\tlambda_NEB3011\t4670 (esc)
+ m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/19915/0_382\tlambda_NEB3011\t4843 (esc)
+
+Normal Merge - should fail:
+
+ $ $PBMERGE $UNALIGNED_BAM $ALIGNED_BAM > $MERGED_BAM
+ ERROR: BAM file sort orders do not match, aborting merge
+ [1]
+
+Shuffle Input - should fail:
+
+ $ $PBMERGE $ALIGNED_BAM $UNALIGNED_BAM > $MERGED_BAM
+ ERROR: BAM file sort orders do not match, aborting merge
+ [1]
+
+Cleanup:
+
+ $ rm $MERGED_BAM
--- /dev/null
+Setup:
+
+ $ TOOLS_BIN="@PacBioBAM_BinDir@" && export TOOLS_BIN
+ $ PBMERGE="$TOOLS_BIN/pbmerge" && export PBMERGE
+ $ BAM2SAM="$TOOLS_BIN/bam2sam" && export BAM2SAM
+
+ $ DATADIR="@PacBioBAM_TestsDir@/data" && export DATADIR
+ $ HQREGION_BAM="$DATADIR/polymerase/internal.hqregions.bam" && export HQREGION_BAM
+ $ SCRAPS_BAM="$DATADIR/polymerase/internal.scraps.bam" && export SCRAPS_BAM
+
+ $ MERGED_BAM="@GeneratedTestDataDir@/pacbio_ordering_merged.bam" && export MERGED_BAM
+ $ MERGED_BAM_PBI="@GeneratedTestDataDir@/pacbio_ordering_merged.bam.pbi" && export MERGED_BAM_PBI
+
+Sanity Check:
+
+ $ $BAM2SAM --header-only $HQREGION_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+
+ $ $BAM2SAM --no-header $HQREGION_BAM | cut -f 1
+ ArminsFakeMovie/100000/2659_7034
+
+ $ $BAM2SAM --header-only $SCRAPS_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+
+ $ $BAM2SAM --no-header $SCRAPS_BAM | cut -f 1
+ ArminsFakeMovie/100000/0_2659
+ ArminsFakeMovie/100000/3025_3047
+ ArminsFakeMovie/100000/3047_3095
+ ArminsFakeMovie/100000/3095_3116
+ ArminsFakeMovie/100000/3628_3650
+ ArminsFakeMovie/100000/3650_3700
+ ArminsFakeMovie/100000/3700_3722
+ ArminsFakeMovie/100000/4267_4289
+ ArminsFakeMovie/100000/4289_4335
+ ArminsFakeMovie/100000/4335_4356
+ ArminsFakeMovie/100000/4864_4888
+ ArminsFakeMovie/100000/4888_4939
+ ArminsFakeMovie/100000/4939_4960
+ ArminsFakeMovie/100000/5477_5498
+ ArminsFakeMovie/100000/5498_5546
+ ArminsFakeMovie/100000/5546_5571
+ ArminsFakeMovie/100000/6087_6116
+ ArminsFakeMovie/100000/6116_6173
+ ArminsFakeMovie/100000/6173_6199
+ ArminsFakeMovie/100000/6719_6740
+ ArminsFakeMovie/100000/6740_6790
+ ArminsFakeMovie/100000/6790_6812
+ ArminsFakeMovie/100000/7034_7035
+ ArminsFakeMovie/200000/0_2659
+ ArminsFakeMovie/200000/3025_3047
+ ArminsFakeMovie/200000/3047_3095
+ ArminsFakeMovie/200000/3095_3116
+ ArminsFakeMovie/200000/3628_3650
+ ArminsFakeMovie/200000/3650_3700
+ ArminsFakeMovie/200000/3700_3722
+ ArminsFakeMovie/200000/4267_4289
+ ArminsFakeMovie/200000/4289_4335
+ ArminsFakeMovie/200000/4335_4356
+ ArminsFakeMovie/200000/4864_4888
+ ArminsFakeMovie/200000/4888_4939
+ ArminsFakeMovie/200000/4939_4960
+ ArminsFakeMovie/200000/5477_5498
+ ArminsFakeMovie/200000/5498_5546
+ ArminsFakeMovie/200000/5546_5571
+ ArminsFakeMovie/200000/6087_6116
+ ArminsFakeMovie/200000/6116_6173
+ ArminsFakeMovie/200000/6173_6199
+ ArminsFakeMovie/200000/6719_6740
+ ArminsFakeMovie/200000/6740_6790
+ ArminsFakeMovie/200000/6790_6812
+ ArminsFakeMovie/200000/7034_7035
+ ArminsFakeMovie/300000/0_2659
+ ArminsFakeMovie/300000/3025_3047
+ ArminsFakeMovie/300000/3047_3095
+ ArminsFakeMovie/300000/3095_3116
+ ArminsFakeMovie/300000/3628_3650
+ ArminsFakeMovie/300000/3650_3700
+ ArminsFakeMovie/300000/3700_3722
+ ArminsFakeMovie/300000/4267_4289
+ ArminsFakeMovie/300000/4289_4335
+ ArminsFakeMovie/300000/4335_4356
+ ArminsFakeMovie/300000/4864_4888
+ ArminsFakeMovie/300000/4888_4939
+ ArminsFakeMovie/300000/4939_4960
+ ArminsFakeMovie/300000/5477_5498
+ ArminsFakeMovie/300000/5498_5546
+ ArminsFakeMovie/300000/5546_5571
+ ArminsFakeMovie/300000/6087_6116
+ ArminsFakeMovie/300000/6116_6173
+ ArminsFakeMovie/300000/6173_6199
+ ArminsFakeMovie/300000/6719_6740
+ ArminsFakeMovie/300000/6740_6790
+ ArminsFakeMovie/300000/6790_6812
+ ArminsFakeMovie/300000/7034_7035
+
+Normal Merge:
+
+ $ $PBMERGE $HQREGION_BAM $SCRAPS_BAM > $MERGED_BAM
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+ @PG\tID:pbmerge-@PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/100000/0_2659
+ ArminsFakeMovie/100000/2659_7034
+ ArminsFakeMovie/100000/3025_3047
+ ArminsFakeMovie/100000/3047_3095
+ ArminsFakeMovie/100000/3095_3116
+ ArminsFakeMovie/100000/3628_3650
+ ArminsFakeMovie/100000/3650_3700
+ ArminsFakeMovie/100000/3700_3722
+ ArminsFakeMovie/100000/4267_4289
+ ArminsFakeMovie/100000/4289_4335
+ ArminsFakeMovie/100000/4335_4356
+ ArminsFakeMovie/100000/4864_4888
+ ArminsFakeMovie/100000/4888_4939
+ ArminsFakeMovie/100000/4939_4960
+ ArminsFakeMovie/100000/5477_5498
+ ArminsFakeMovie/100000/5498_5546
+ ArminsFakeMovie/100000/5546_5571
+ ArminsFakeMovie/100000/6087_6116
+ ArminsFakeMovie/100000/6116_6173
+ ArminsFakeMovie/100000/6173_6199
+ ArminsFakeMovie/100000/6719_6740
+ ArminsFakeMovie/100000/6740_6790
+ ArminsFakeMovie/100000/6790_6812
+ ArminsFakeMovie/100000/7034_7035
+ ArminsFakeMovie/200000/0_2659
+ ArminsFakeMovie/200000/3025_3047
+ ArminsFakeMovie/200000/3047_3095
+ ArminsFakeMovie/200000/3095_3116
+ ArminsFakeMovie/200000/3628_3650
+ ArminsFakeMovie/200000/3650_3700
+ ArminsFakeMovie/200000/3700_3722
+ ArminsFakeMovie/200000/4267_4289
+ ArminsFakeMovie/200000/4289_4335
+ ArminsFakeMovie/200000/4335_4356
+ ArminsFakeMovie/200000/4864_4888
+ ArminsFakeMovie/200000/4888_4939
+ ArminsFakeMovie/200000/4939_4960
+ ArminsFakeMovie/200000/5477_5498
+ ArminsFakeMovie/200000/5498_5546
+ ArminsFakeMovie/200000/5546_5571
+ ArminsFakeMovie/200000/6087_6116
+ ArminsFakeMovie/200000/6116_6173
+ ArminsFakeMovie/200000/6173_6199
+ ArminsFakeMovie/200000/6719_6740
+ ArminsFakeMovie/200000/6740_6790
+ ArminsFakeMovie/200000/6790_6812
+ ArminsFakeMovie/200000/7034_7035
+ ArminsFakeMovie/300000/0_2659
+ ArminsFakeMovie/300000/3025_3047
+ ArminsFakeMovie/300000/3047_3095
+ ArminsFakeMovie/300000/3095_3116
+ ArminsFakeMovie/300000/3628_3650
+ ArminsFakeMovie/300000/3650_3700
+ ArminsFakeMovie/300000/3700_3722
+ ArminsFakeMovie/300000/4267_4289
+ ArminsFakeMovie/300000/4289_4335
+ ArminsFakeMovie/300000/4335_4356
+ ArminsFakeMovie/300000/4864_4888
+ ArminsFakeMovie/300000/4888_4939
+ ArminsFakeMovie/300000/4939_4960
+ ArminsFakeMovie/300000/5477_5498
+ ArminsFakeMovie/300000/5498_5546
+ ArminsFakeMovie/300000/5546_5571
+ ArminsFakeMovie/300000/6087_6116
+ ArminsFakeMovie/300000/6116_6173
+ ArminsFakeMovie/300000/6173_6199
+ ArminsFakeMovie/300000/6719_6740
+ ArminsFakeMovie/300000/6740_6790
+ ArminsFakeMovie/300000/6790_6812
+ ArminsFakeMovie/300000/7034_7035
+
+ $ rm $MERGED_BAM
+
+Shuffle Input:
+
+ $ $PBMERGE $SCRAPS_BAM $HQREGION_BAM > $MERGED_BAM
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+ @PG\tID:pbmerge-@PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/100000/0_2659
+ ArminsFakeMovie/100000/2659_7034
+ ArminsFakeMovie/100000/3025_3047
+ ArminsFakeMovie/100000/3047_3095
+ ArminsFakeMovie/100000/3095_3116
+ ArminsFakeMovie/100000/3628_3650
+ ArminsFakeMovie/100000/3650_3700
+ ArminsFakeMovie/100000/3700_3722
+ ArminsFakeMovie/100000/4267_4289
+ ArminsFakeMovie/100000/4289_4335
+ ArminsFakeMovie/100000/4335_4356
+ ArminsFakeMovie/100000/4864_4888
+ ArminsFakeMovie/100000/4888_4939
+ ArminsFakeMovie/100000/4939_4960
+ ArminsFakeMovie/100000/5477_5498
+ ArminsFakeMovie/100000/5498_5546
+ ArminsFakeMovie/100000/5546_5571
+ ArminsFakeMovie/100000/6087_6116
+ ArminsFakeMovie/100000/6116_6173
+ ArminsFakeMovie/100000/6173_6199
+ ArminsFakeMovie/100000/6719_6740
+ ArminsFakeMovie/100000/6740_6790
+ ArminsFakeMovie/100000/6790_6812
+ ArminsFakeMovie/100000/7034_7035
+ ArminsFakeMovie/200000/0_2659
+ ArminsFakeMovie/200000/3025_3047
+ ArminsFakeMovie/200000/3047_3095
+ ArminsFakeMovie/200000/3095_3116
+ ArminsFakeMovie/200000/3628_3650
+ ArminsFakeMovie/200000/3650_3700
+ ArminsFakeMovie/200000/3700_3722
+ ArminsFakeMovie/200000/4267_4289
+ ArminsFakeMovie/200000/4289_4335
+ ArminsFakeMovie/200000/4335_4356
+ ArminsFakeMovie/200000/4864_4888
+ ArminsFakeMovie/200000/4888_4939
+ ArminsFakeMovie/200000/4939_4960
+ ArminsFakeMovie/200000/5477_5498
+ ArminsFakeMovie/200000/5498_5546
+ ArminsFakeMovie/200000/5546_5571
+ ArminsFakeMovie/200000/6087_6116
+ ArminsFakeMovie/200000/6116_6173
+ ArminsFakeMovie/200000/6173_6199
+ ArminsFakeMovie/200000/6719_6740
+ ArminsFakeMovie/200000/6740_6790
+ ArminsFakeMovie/200000/6790_6812
+ ArminsFakeMovie/200000/7034_7035
+ ArminsFakeMovie/300000/0_2659
+ ArminsFakeMovie/300000/3025_3047
+ ArminsFakeMovie/300000/3047_3095
+ ArminsFakeMovie/300000/3095_3116
+ ArminsFakeMovie/300000/3628_3650
+ ArminsFakeMovie/300000/3650_3700
+ ArminsFakeMovie/300000/3700_3722
+ ArminsFakeMovie/300000/4267_4289
+ ArminsFakeMovie/300000/4289_4335
+ ArminsFakeMovie/300000/4335_4356
+ ArminsFakeMovie/300000/4864_4888
+ ArminsFakeMovie/300000/4888_4939
+ ArminsFakeMovie/300000/4939_4960
+ ArminsFakeMovie/300000/5477_5498
+ ArminsFakeMovie/300000/5498_5546
+ ArminsFakeMovie/300000/5546_5571
+ ArminsFakeMovie/300000/6087_6116
+ ArminsFakeMovie/300000/6116_6173
+ ArminsFakeMovie/300000/6173_6199
+ ArminsFakeMovie/300000/6719_6740
+ ArminsFakeMovie/300000/6740_6790
+ ArminsFakeMovie/300000/6790_6812
+ ArminsFakeMovie/300000/7034_7035
+
+ $ rm $MERGED_BAM
+
+Explicit Output Filename (also enables PBI):
+
+ $ $PBMERGE -o $MERGED_BAM $HQREGION_BAM $SCRAPS_BAM
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+ @PG\tID:pbmerge-@PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/100000/0_2659
+ ArminsFakeMovie/100000/2659_7034
+ ArminsFakeMovie/100000/3025_3047
+ ArminsFakeMovie/100000/3047_3095
+ ArminsFakeMovie/100000/3095_3116
+ ArminsFakeMovie/100000/3628_3650
+ ArminsFakeMovie/100000/3650_3700
+ ArminsFakeMovie/100000/3700_3722
+ ArminsFakeMovie/100000/4267_4289
+ ArminsFakeMovie/100000/4289_4335
+ ArminsFakeMovie/100000/4335_4356
+ ArminsFakeMovie/100000/4864_4888
+ ArminsFakeMovie/100000/4888_4939
+ ArminsFakeMovie/100000/4939_4960
+ ArminsFakeMovie/100000/5477_5498
+ ArminsFakeMovie/100000/5498_5546
+ ArminsFakeMovie/100000/5546_5571
+ ArminsFakeMovie/100000/6087_6116
+ ArminsFakeMovie/100000/6116_6173
+ ArminsFakeMovie/100000/6173_6199
+ ArminsFakeMovie/100000/6719_6740
+ ArminsFakeMovie/100000/6740_6790
+ ArminsFakeMovie/100000/6790_6812
+ ArminsFakeMovie/100000/7034_7035
+ ArminsFakeMovie/200000/0_2659
+ ArminsFakeMovie/200000/3025_3047
+ ArminsFakeMovie/200000/3047_3095
+ ArminsFakeMovie/200000/3095_3116
+ ArminsFakeMovie/200000/3628_3650
+ ArminsFakeMovie/200000/3650_3700
+ ArminsFakeMovie/200000/3700_3722
+ ArminsFakeMovie/200000/4267_4289
+ ArminsFakeMovie/200000/4289_4335
+ ArminsFakeMovie/200000/4335_4356
+ ArminsFakeMovie/200000/4864_4888
+ ArminsFakeMovie/200000/4888_4939
+ ArminsFakeMovie/200000/4939_4960
+ ArminsFakeMovie/200000/5477_5498
+ ArminsFakeMovie/200000/5498_5546
+ ArminsFakeMovie/200000/5546_5571
+ ArminsFakeMovie/200000/6087_6116
+ ArminsFakeMovie/200000/6116_6173
+ ArminsFakeMovie/200000/6173_6199
+ ArminsFakeMovie/200000/6719_6740
+ ArminsFakeMovie/200000/6740_6790
+ ArminsFakeMovie/200000/6790_6812
+ ArminsFakeMovie/200000/7034_7035
+ ArminsFakeMovie/300000/0_2659
+ ArminsFakeMovie/300000/3025_3047
+ ArminsFakeMovie/300000/3047_3095
+ ArminsFakeMovie/300000/3095_3116
+ ArminsFakeMovie/300000/3628_3650
+ ArminsFakeMovie/300000/3650_3700
+ ArminsFakeMovie/300000/3700_3722
+ ArminsFakeMovie/300000/4267_4289
+ ArminsFakeMovie/300000/4289_4335
+ ArminsFakeMovie/300000/4335_4356
+ ArminsFakeMovie/300000/4864_4888
+ ArminsFakeMovie/300000/4888_4939
+ ArminsFakeMovie/300000/4939_4960
+ ArminsFakeMovie/300000/5477_5498
+ ArminsFakeMovie/300000/5498_5546
+ ArminsFakeMovie/300000/5546_5571
+ ArminsFakeMovie/300000/6087_6116
+ ArminsFakeMovie/300000/6116_6173
+ ArminsFakeMovie/300000/6173_6199
+ ArminsFakeMovie/300000/6719_6740
+ ArminsFakeMovie/300000/6740_6790
+ ArminsFakeMovie/300000/6790_6812
+ ArminsFakeMovie/300000/7034_7035
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Found
+
+ $ rm $MERGED_BAM
+ $ rm $MERGED_BAM_PBI
+
+Explicit Output Filename (with disabled PBI):
+
+ $ $PBMERGE -o $MERGED_BAM --no-pbi $HQREGION_BAM $SCRAPS_BAM
+
+ $ $BAM2SAM --header-only $MERGED_BAM
+ @HD\tVN:1.1\tSO:unknown\tpb:3.0.1 (esc)
+ @RG\tID:ca75d884\tPL:PACBIO\tDS:READTYPE=HQREGION;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\tPM:SEQUEL (esc)
+ @PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0 (esc)
+ @PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0 (esc)
+ @PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0 (esc)
+ @PG\tID:pbmerge-@PacBioBAM_VERSION@\tPN:pbmerge\tVN:@PacBioBAM_VERSION@ (esc)
+
+ $ $BAM2SAM --no-header $MERGED_BAM | cut -f 1
+ ArminsFakeMovie/100000/0_2659
+ ArminsFakeMovie/100000/2659_7034
+ ArminsFakeMovie/100000/3025_3047
+ ArminsFakeMovie/100000/3047_3095
+ ArminsFakeMovie/100000/3095_3116
+ ArminsFakeMovie/100000/3628_3650
+ ArminsFakeMovie/100000/3650_3700
+ ArminsFakeMovie/100000/3700_3722
+ ArminsFakeMovie/100000/4267_4289
+ ArminsFakeMovie/100000/4289_4335
+ ArminsFakeMovie/100000/4335_4356
+ ArminsFakeMovie/100000/4864_4888
+ ArminsFakeMovie/100000/4888_4939
+ ArminsFakeMovie/100000/4939_4960
+ ArminsFakeMovie/100000/5477_5498
+ ArminsFakeMovie/100000/5498_5546
+ ArminsFakeMovie/100000/5546_5571
+ ArminsFakeMovie/100000/6087_6116
+ ArminsFakeMovie/100000/6116_6173
+ ArminsFakeMovie/100000/6173_6199
+ ArminsFakeMovie/100000/6719_6740
+ ArminsFakeMovie/100000/6740_6790
+ ArminsFakeMovie/100000/6790_6812
+ ArminsFakeMovie/100000/7034_7035
+ ArminsFakeMovie/200000/0_2659
+ ArminsFakeMovie/200000/3025_3047
+ ArminsFakeMovie/200000/3047_3095
+ ArminsFakeMovie/200000/3095_3116
+ ArminsFakeMovie/200000/3628_3650
+ ArminsFakeMovie/200000/3650_3700
+ ArminsFakeMovie/200000/3700_3722
+ ArminsFakeMovie/200000/4267_4289
+ ArminsFakeMovie/200000/4289_4335
+ ArminsFakeMovie/200000/4335_4356
+ ArminsFakeMovie/200000/4864_4888
+ ArminsFakeMovie/200000/4888_4939
+ ArminsFakeMovie/200000/4939_4960
+ ArminsFakeMovie/200000/5477_5498
+ ArminsFakeMovie/200000/5498_5546
+ ArminsFakeMovie/200000/5546_5571
+ ArminsFakeMovie/200000/6087_6116
+ ArminsFakeMovie/200000/6116_6173
+ ArminsFakeMovie/200000/6173_6199
+ ArminsFakeMovie/200000/6719_6740
+ ArminsFakeMovie/200000/6740_6790
+ ArminsFakeMovie/200000/6790_6812
+ ArminsFakeMovie/200000/7034_7035
+ ArminsFakeMovie/300000/0_2659
+ ArminsFakeMovie/300000/3025_3047
+ ArminsFakeMovie/300000/3047_3095
+ ArminsFakeMovie/300000/3095_3116
+ ArminsFakeMovie/300000/3628_3650
+ ArminsFakeMovie/300000/3650_3700
+ ArminsFakeMovie/300000/3700_3722
+ ArminsFakeMovie/300000/4267_4289
+ ArminsFakeMovie/300000/4289_4335
+ ArminsFakeMovie/300000/4335_4356
+ ArminsFakeMovie/300000/4864_4888
+ ArminsFakeMovie/300000/4888_4939
+ ArminsFakeMovie/300000/4939_4960
+ ArminsFakeMovie/300000/5477_5498
+ ArminsFakeMovie/300000/5498_5546
+ ArminsFakeMovie/300000/5546_5571
+ ArminsFakeMovie/300000/6087_6116
+ ArminsFakeMovie/300000/6116_6173
+ ArminsFakeMovie/300000/6173_6199
+ ArminsFakeMovie/300000/6719_6740
+ ArminsFakeMovie/300000/6740_6790
+ ArminsFakeMovie/300000/6790_6812
+ ArminsFakeMovie/300000/7034_7035
+
+ $ [ -f $MERGED_BAM_PBI ] && echo "Found" || echo "Not found"
+ Not found
+
+ $ rm $MERGED_BAM
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+try:
+ import PacBioBam as bam
+ header = bam.BamHeader()
+ print "\nPython wrapper OK.\n"
+except ImportError:
+ print "\nPython wrapper failed!\n"
+
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+# EMPTY ON PURPOSE.
+# This file just needs to exist for unit test discovery.
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+class TestData(object):
+ def __init__(self):
+
+ # Main test data directory
+ self.directory = "@PacBioBAM_TestsDir@/data"
+
\ No newline at end of file
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+import PacBioBam
+import config
+import unittest
+
+class AccuracyTest(unittest.TestCase):
+
+ # ------------ SETUP --------------
+
+ def runTest(self):
+ self.test_clamp()
+
+ # ------------ TESTS --------------
+
+ def test_clamp(self):
+ a_zero = PacBioBam.Accuracy(0.0)
+ a_neg = PacBioBam.Accuracy(-0.5)
+ a_min = PacBioBam.Accuracy(0.0)
+ a_normal = PacBioBam.Accuracy(0.9)
+ a_max = PacBioBam.Accuracy(1.0)
+ a_tooLarge = PacBioBam.Accuracy(1.1)
+
+ self.assertAlmostEqual(float(0.0), float(a_zero))
+ self.assertAlmostEqual(float(0.0), float(a_neg))
+ self.assertAlmostEqual(float(0.0), float(a_min))
+ self.assertAlmostEqual(float(0.9), float(a_normal))
+ self.assertAlmostEqual(float(1.0), float(a_max))
+ self.assertAlmostEqual(float(1.0), float(a_tooLarge))
+
\ No newline at end of file
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+import PacBioBam
+import config
+import unittest
+
+class BamFileTest(unittest.TestCase):
+
+ # ------------ SETUP --------------
+
+ def setUp(self):
+ self.data = config.TestData()
+ self.bamFn = self.data.directory + "/aligned.bam"
+
+ def runTest(self):
+ self.test_ctor()
+ self.test_nonExistentFile()
+
+ # ------------ TESTS --------------
+
+ def test_ctor(self):
+ f = PacBioBam.BamFile(self.bamFn)
+
+ def test_nonExistentFile(self):
+ with self.assertRaises(RuntimeError):
+ f = PacBioBam.BamFile("non_existent_file.bam")
+
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+import PacBioBam
+import config
+import unittest
+
+class BamHeaderTest(unittest.TestCase):
+
+ # ------------ SETUP --------------
+
+ def runTest(self):
+ self.test_defaultCtor()
+ self.test_decode()
+ self.test_encode()
+
+ # ------------ TESTS --------------
+
+ def test_defaultCtor(self):
+
+ header = PacBioBam.BamHeader()
+
+ self.assertFalse(header.Version())
+ self.assertFalse(header.SortOrder())
+ self.assertEqual(0, len(header.ReadGroups()))
+ self.assertEqual(0, len(header.Sequences()))
+ self.assertEqual(0, len(header.Programs()))
+ self.assertEqual(0, len(header.Comments()))
+
+ with self.assertRaises(RuntimeError):
+ pg = header.Program("foo")
+ rg = header.ReadGroup("foo")
+ sq = header.SequenceId("foo")
+ sl = header.SequenceLength(42)
+ sn = header.SequenceName(42)
+
+
+ def test_decode(self):
+
+ text = ("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
+ "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
+ "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
+ "@RG\tID:rg1\tSM:control\n"
+ "@RG\tID:rg2\tSM:condition1\n"
+ "@RG\tID:rg3\tSM:condition1\n"
+ "@PG\tID:_foo_\tPN:ide\n"
+ "@CO\tipsum and so on\n"
+ "@CO\tcitation needed\n")
+
+ header = PacBioBam.BamHeader(text)
+
+ self.assertEqual("1.1", header.Version())
+ self.assertEqual("queryname", header.SortOrder())
+ self.assertEqual("3.0.1", header.PacBioBamVersion())
+
+ self.assertEqual(3, len(header.ReadGroups()))
+ self.assertTrue(header.HasReadGroup("rg1"))
+ self.assertTrue(header.HasReadGroup("rg2"))
+ self.assertTrue(header.HasReadGroup("rg3"))
+ self.assertEqual("control", header.ReadGroup("rg1").Sample())
+ self.assertEqual("condition1", header.ReadGroup("rg2").Sample())
+ self.assertEqual("condition1", header.ReadGroup("rg3").Sample())
+
+ self.assertEqual(2, len(header.Sequences()))
+ self.assertTrue(header.HasSequence("chr1"))
+ self.assertTrue(header.HasSequence("chr2"))
+ self.assertEqual("chocobo", header.Sequence("chr1").Species())
+ self.assertEqual("chocobo", header.Sequence("chr2").Species())
+ self.assertEqual("2038", header.Sequence("chr1").Length())
+ self.assertEqual("3042", header.Sequence("chr2").Length())
+
+ self.assertEqual(1, len(header.Programs()))
+ self.assertTrue(header.HasProgram("_foo_"))
+ self.assertEqual("ide", header.Program("_foo_").Name())
+
+ self.assertEqual(2, len(header.Comments()))
+ self.assertEqual("ipsum and so on", header.Comments()[0])
+ self.assertEqual("citation needed", header.Comments()[1])
+
+ def test_encode(self):
+
+ expectedText = ("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
+ "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
+ "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
+ "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n"
+ "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+ "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+ "@PG\tID:_foo_\tPN:ide\n"
+ "@CO\tipsum and so on\n"
+ "@CO\tcitation needed\n")
+
+ rg1 = PacBioBam.ReadGroupInfo("rg1")
+ rg1.Sample("control")
+ rg2 = PacBioBam.ReadGroupInfo("rg2")
+ rg2.Sample("condition1")
+ rg3 = PacBioBam.ReadGroupInfo("rg3")
+ rg3.Sample("condition1")
+
+ seq1 = PacBioBam.SequenceInfo("chr1")
+ seq1.Length("2038")
+ seq1.Species("chocobo")
+ seq2 = PacBioBam.SequenceInfo("chr2")
+ seq2.Length("3042")
+ seq2.Species("chocobo")
+
+ prog1 = PacBioBam.ProgramInfo("_foo_")
+ prog1.Name("ide")
+
+ header = PacBioBam.BamHeader()
+ header.Version("1.1")
+ header.SortOrder("queryname")
+ header.PacBioBamVersion("3.0.1")
+ header.AddReadGroup(rg1)
+ header.AddReadGroup(rg2)
+ header.AddReadGroup(rg3)
+ header.AddSequence(seq1)
+ header.AddSequence(seq2)
+ header.AddProgram(prog1)
+ header.AddComment("ipsum and so on")
+ header.AddComment("citation needed")
+
+ self.assertEqual(expectedText, header.ToSam())
+
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+import PacBioBam
+import config
+import unittest
+
+class CigarTest(unittest.TestCase):
+
+ # ------------ SETUP --------------
+
+ def runTest(self):
+ self.test_typeToChar()
+ self.test_charToType()
+ self.test_setType()
+ self.test_setChar()
+ self.test_cigarOpCtors()
+ self.test_fromEmptyString()
+ self.test_fromString()
+ self.test_toEmptyString()
+ self.test_toString()
+
+ # ------------ TESTS --------------
+
+ def test_typeToChar(self):
+ self.assertEqual('M', PacBioBam.CigarOperation.TypeToChar(PacBioBam.ALIGNMENT_MATCH))
+ self.assertEqual('I', PacBioBam.CigarOperation.TypeToChar(PacBioBam.INSERTION))
+ self.assertEqual('D', PacBioBam.CigarOperation.TypeToChar(PacBioBam.DELETION))
+ self.assertEqual('N', PacBioBam.CigarOperation.TypeToChar(PacBioBam.REFERENCE_SKIP))
+ self.assertEqual('S', PacBioBam.CigarOperation.TypeToChar(PacBioBam.SOFT_CLIP))
+ self.assertEqual('H', PacBioBam.CigarOperation.TypeToChar(PacBioBam.HARD_CLIP))
+ self.assertEqual('P', PacBioBam.CigarOperation.TypeToChar(PacBioBam.PADDING))
+ self.assertEqual('=', PacBioBam.CigarOperation.TypeToChar(PacBioBam.SEQUENCE_MATCH))
+ self.assertEqual('X', PacBioBam.CigarOperation.TypeToChar(PacBioBam.SEQUENCE_MISMATCH))
+
+ def test_charToType(self):
+ self.assertEqual(PacBioBam.ALIGNMENT_MATCH, PacBioBam.CigarOperation.CharToType('M'))
+ self.assertEqual(PacBioBam.INSERTION, PacBioBam.CigarOperation.CharToType('I'))
+ self.assertEqual(PacBioBam.DELETION, PacBioBam.CigarOperation.CharToType('D'))
+ self.assertEqual(PacBioBam.REFERENCE_SKIP, PacBioBam.CigarOperation.CharToType('N'))
+ self.assertEqual(PacBioBam.SOFT_CLIP, PacBioBam.CigarOperation.CharToType('S'))
+ self.assertEqual(PacBioBam.HARD_CLIP, PacBioBam.CigarOperation.CharToType('H'))
+ self.assertEqual(PacBioBam.PADDING, PacBioBam.CigarOperation.CharToType('P'))
+ self.assertEqual(PacBioBam.SEQUENCE_MATCH, PacBioBam.CigarOperation.CharToType('='))
+ self.assertEqual(PacBioBam.SEQUENCE_MISMATCH, PacBioBam.CigarOperation.CharToType('X'))
+
+ def test_setType(self):
+ m = PacBioBam.CigarOperation()
+ i = PacBioBam.CigarOperation()
+ d = PacBioBam.CigarOperation()
+ n = PacBioBam.CigarOperation()
+ s = PacBioBam.CigarOperation()
+ h = PacBioBam.CigarOperation()
+ p = PacBioBam.CigarOperation()
+ e = PacBioBam.CigarOperation()
+ x = PacBioBam.CigarOperation()
+
+ m.Type(PacBioBam.ALIGNMENT_MATCH)
+ i.Type(PacBioBam.INSERTION)
+ d.Type(PacBioBam.DELETION)
+ n.Type(PacBioBam.REFERENCE_SKIP)
+ s.Type(PacBioBam.SOFT_CLIP)
+ h.Type(PacBioBam.HARD_CLIP)
+ p.Type(PacBioBam.PADDING)
+ e.Type(PacBioBam.SEQUENCE_MATCH)
+ x.Type(PacBioBam.SEQUENCE_MISMATCH)
+
+ self.assertEqual('M', m.Char())
+ self.assertEqual('I', i.Char())
+ self.assertEqual('D', d.Char())
+ self.assertEqual('N', n.Char())
+ self.assertEqual('S', s.Char())
+ self.assertEqual('H', h.Char())
+ self.assertEqual('P', p.Char())
+ self.assertEqual('=', e.Char())
+ self.assertEqual('X', x.Char())
+
+ def test_setChar(self):
+ m = PacBioBam.CigarOperation()
+ i = PacBioBam.CigarOperation()
+ d = PacBioBam.CigarOperation()
+ n = PacBioBam.CigarOperation()
+ s = PacBioBam.CigarOperation()
+ h = PacBioBam.CigarOperation()
+ p = PacBioBam.CigarOperation()
+ e = PacBioBam.CigarOperation()
+ x = PacBioBam.CigarOperation()
+
+ m.Char('M')
+ i.Char('I')
+ d.Char('D')
+ n.Char('N')
+ s.Char('S')
+ h.Char('H')
+ p.Char('P')
+ e.Char('=')
+ x.Char('X')
+
+ self.assertEqual(PacBioBam.ALIGNMENT_MATCH, m.Type())
+ self.assertEqual(PacBioBam.INSERTION, i.Type())
+ self.assertEqual(PacBioBam.DELETION, d.Type())
+ self.assertEqual(PacBioBam.REFERENCE_SKIP, n.Type())
+ self.assertEqual(PacBioBam.SOFT_CLIP, s.Type())
+ self.assertEqual(PacBioBam.HARD_CLIP, h.Type())
+ self.assertEqual(PacBioBam.PADDING, p.Type())
+ self.assertEqual(PacBioBam.SEQUENCE_MATCH, e.Type())
+ self.assertEqual(PacBioBam.SEQUENCE_MISMATCH, x.Type())
+
+ def test_cigarOpCtors(self):
+ c1 = PacBioBam.CigarOperation('S', 10)
+ c2 = PacBioBam.CigarOperation(PacBioBam.SOFT_CLIP, 10)
+
+ self.assertEqual('S', c1.Char())
+ self.assertEqual('S', c2.Char())
+ self.assertEqual(PacBioBam.SOFT_CLIP, c1.Type())
+ self.assertEqual(PacBioBam.SOFT_CLIP, c2.Type())
+ self.assertEqual(10, c1.Length())
+ self.assertEqual(10, c2.Length())
+
+ def test_fromEmptyString(self):
+ s = ""
+ cigar = PacBioBam.Cigar(s)
+ self.assertEqual(0, len(cigar))
+
+ def test_fromString(self):
+ singleCigarString = "100="
+ multiCigarString = "100=2D34I6=6X6="
+
+ singleCigar = PacBioBam.Cigar(singleCigarString)
+ multiCigar = PacBioBam.Cigar(multiCigarString)
+
+ self.assertEqual(1, len(singleCigar))
+ c = singleCigar[0]
+ self.assertEqual('=', c.Char())
+ self.assertEqual(100, c.Length())
+
+ self.assertEqual(6, len(multiCigar))
+ op0 = multiCigar[0]
+ op1 = multiCigar[1]
+ op2 = multiCigar[2]
+ op3 = multiCigar[3]
+ op4 = multiCigar[4]
+ op5 = multiCigar[5]
+
+ self.assertEqual('=', op0.Char())
+ self.assertEqual('D', op1.Char())
+ self.assertEqual('I', op2.Char())
+ self.assertEqual('=', op3.Char())
+ self.assertEqual('X', op4.Char())
+ self.assertEqual('=', op5.Char())
+ self.assertEqual(100, op0.Length())
+ self.assertEqual(2, op1.Length())
+ self.assertEqual(34, op2.Length())
+ self.assertEqual(6, op3.Length())
+ self.assertEqual(6, op4.Length())
+ self.assertEqual(6, op5.Length())
+
+ def test_toEmptyString(self):
+ cigar = PacBioBam.Cigar()
+ self.assertFalse(cigar.ToStdString())
+
+ def test_toString(self):
+
+ singleCigarString = "100="
+ multiCigarString = "100=2D34I6=6X6="
+
+ singleCigar = PacBioBam.Cigar()
+ singleCigar.append(PacBioBam.CigarOperation(PacBioBam.SEQUENCE_MATCH, 100))
+
+ multiCigar = PacBioBam.Cigar()
+ multiCigar.append(PacBioBam.CigarOperation(PacBioBam.SEQUENCE_MATCH, 100))
+ multiCigar.append(PacBioBam.CigarOperation(PacBioBam.DELETION, 2))
+ multiCigar.append(PacBioBam.CigarOperation(PacBioBam.INSERTION, 34))
+ multiCigar.append(PacBioBam.CigarOperation(PacBioBam.SEQUENCE_MATCH, 6))
+ multiCigar.append(PacBioBam.CigarOperation(PacBioBam.SEQUENCE_MISMATCH,6))
+ multiCigar.append(PacBioBam.CigarOperation(PacBioBam.SEQUENCE_MATCH, 6))
+
+ self.assertEqual(singleCigarString, singleCigar.ToStdString())
+ self.assertEqual(multiCigarString, multiCigar.ToStdString())
+
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+import PacBioBam
+import config
+
+import os
+import unittest
+
+class EndToEndTest(unittest.TestCase):
+
+ def originalNames(self):
+ # loop over original file, store names, write to generated file
+ try:
+ file = PacBioBam.BamFile(self.ex2BamFn)
+ writer = PacBioBam.BamWriter(self.generatedBamFn, file.Header())
+
+ dataset = PacBioBam.DataSet(self.ex2BamFn)
+ entireFile = PacBioBam.EntireFileQuery(dataset)
+
+ names_in = []
+ for record in PacBioBam.Iterate(entireFile):
+ names_in.append(record.FullName())
+ writer.Write(record)
+ return names_in
+
+ except RuntimeError:
+ self.assertTrue(False) # should not throw
+
+ def generatedNames(self):
+ try:
+ # open dataset on generated BAM file, read in names
+ dataset = PacBioBam.DataSet(self.generatedBamFn)
+ entireFile = PacBioBam.EntireFileQuery(dataset)
+ names_out = []
+ for record in PacBioBam.Iterate(entireFile):
+ names_out.append(record.FullName())
+ return names_out
+
+ except RuntimeError:
+ self.assertTrue(False) # should not throw
+
+ def runTest(self):
+
+ self.testData = config.TestData()
+ self.ex2BamFn = self.testData.directory + "/aligned.bam"
+ self.generatedBamFn = self.testData.directory + "/generated.bam"
+
+ # compare input records to generated copy's records
+ names_in = self.originalNames()
+ names_out = self.generatedNames()
+ self.assertEqual(names_in, names_out)
+
+ # clean up
+ os.remove(self.generatedBamFn)
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+import PacBioBam
+import config
+import unittest
+
+class FramesTest(unittest.TestCase):
+
+ # ------------ SETUP --------------
+
+ def setUp(self):
+ self.testframes = [
+ 0, 8, 140, 0, 0, 7, 4, 0, 85, 2,
+ 1, 3, 2, 10, 1, 20, 47, 10, 9, 60,
+ 20, 3, 12, 5, 13, 165, 6, 14, 22, 12,
+ 2, 4, 9, 218, 27, 3, 15, 2, 17, 2,
+ 45, 24, 89, 10, 7, 1, 11, 15, 0, 7,
+ 0, 28, 17, 12, 6, 10, 37, 0, 12, 52,
+ 0, 7, 1, 14, 3, 26, 12, 0, 20, 17,
+ 2, 13, 2, 9, 13, 7, 15, 29, 3, 6,
+ 2, 1, 28, 10, 3, 14, 7, 1, 22, 1,
+ 6, 6, 0, 19, 31, 6, 2, 14, 0, 0,
+ 1000, 947, 948
+ ]
+
+ self.encoded = [
+ 0, 8, 102, 0, 0, 7, 4, 0, 75, 2, 1, 3, 2,
+ 10, 1, 20, 47, 10, 9, 60, 20, 3, 12, 5, 13, 115,
+ 6, 14, 22, 12, 2, 4, 9, 135, 27, 3, 15, 2, 17,
+ 2, 45, 24, 77, 10, 7, 1, 11, 15, 0, 7, 0, 28,
+ 17, 12, 6, 10, 37, 0, 12, 52, 0, 7, 1, 14, 3,
+ 26, 12, 0, 20, 17, 2, 13, 2, 9, 13, 7, 15, 29,
+ 3, 6, 2, 1, 28, 10, 3, 14, 7, 1, 22, 1, 6,
+ 6, 0, 19, 31, 6, 2, 14, 0, 0,
+ 255, 254, 255
+ ]
+
+ def runTest(self):
+ self.test_ctors()
+ self.test_encode()
+
+ # ------------ TESTS --------------
+
+ def test_ctors(self):
+ f = PacBioBam.Frames()
+ self.assertEqual(0, len(f.Data()))
+
+ f2 = PacBioBam.Frames(self.testframes)
+ d = f2.Data()
+ self.assertEqual(len(self.testframes), len(d))
+ for i, v in enumerate(d):
+ self.assertEqual(int(self.testframes[i]), int(v))
+
+ def test_encode(self):
+ f = PacBioBam.Frames(self.testframes)
+ e = f.Encode()
+ self.assertEqual(len(self.encoded), len(e))
+ for i, v in enumerate(e):
+ self.assertEqual(int(self.encoded[i]), int(v))
+
\ No newline at end of file
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+import PacBioBam
+import config
+import unittest
+
+class IntervalsTest(unittest.TestCase):
+
+ # ------------ SETUP --------------
+
+ def runTest(self):
+ self.test_unmappedPosition()
+ self.test_ctors()
+ self.test_equality()
+ self.test_copy()
+ self.test_modifiers()
+ self.test_cover()
+ self.test_intersect()
+ self.test_validity()
+ self.test_length()
+
+ # ------------ TESTS --------------
+
+ def test_unmappedPosition(self):
+ self.assertEqual(-1, PacBioBam.UnmappedPosition)
+
+ def test_ctors(self):
+ empty = PacBioBam.PositionInterval()
+ single = PacBioBam.PositionInterval(4)
+ normal = PacBioBam.PositionInterval(5, 8)
+
+ self.assertEqual(0, empty.Start())
+ self.assertEqual(0, empty.Stop())
+ self.assertEqual(4, single.Start())
+ self.assertEqual(5, single.Stop())
+ self.assertEqual(5, normal.Start())
+ self.assertEqual(8, normal.Stop())
+
+ def test_equality(self):
+
+ empty = PacBioBam.PositionInterval()
+ empty2 = PacBioBam.PositionInterval()
+ singleton = PacBioBam.PositionInterval(4)
+ sameAsSingleton = PacBioBam.PositionInterval(4, 5)
+ normal = PacBioBam.PositionInterval(5, 8)
+ sameAsNormal = PacBioBam.PositionInterval(5, 8)
+ different = PacBioBam.PositionInterval(20, 40)
+
+ # self-equality
+ self.assertEqual(empty, empty)
+ self.assertEqual(singleton, singleton)
+ self.assertEqual(normal, normal)
+ self.assertEqual(different, different)
+
+ # same values
+ self.assertEqual(empty, empty2)
+ self.assertEqual(singleton, sameAsSingleton)
+ self.assertEqual(normal, sameAsNormal)
+
+ # different values
+ self.assertNotEqual(empty, singleton)
+ self.assertNotEqual(empty, normal)
+ self.assertNotEqual(empty, different)
+ self.assertNotEqual(singleton, normal)
+ self.assertNotEqual(normal, different)
+
+ def test_copy(self):
+ interval1 = PacBioBam.PositionInterval(5,8)
+ interval2 = PacBioBam.PositionInterval(interval1)
+ interval3 = interval1
+
+ self.assertEqual(interval1, interval1)
+ self.assertEqual(interval1, interval2)
+ self.assertEqual(interval1, interval3)
+
+ def test_modifiers(self):
+
+ interval1 = PacBioBam.PositionInterval(5,8)
+ interval2 = PacBioBam.PositionInterval(interval1)
+ interval2.Start(2)
+ interval2.Stop(10)
+
+ self.assertNotEqual(interval1, interval2)
+ self.assertEqual(2, interval2.Start())
+ self.assertEqual(10, interval2.Stop())
+
+ def test_cover(self):
+
+ a = PacBioBam.PositionInterval(2,4)
+ b = PacBioBam.PositionInterval(3,5)
+ c = PacBioBam.PositionInterval(6,8)
+ d = PacBioBam.PositionInterval(1,7)
+ e = PacBioBam.PositionInterval(5,8)
+
+ # 0123456789
+ # a --
+ # b --
+ # c --
+ # d ------
+ # e ---
+
+ # self-cover
+ self.assertTrue(a.Covers(a))
+ self.assertTrue(a.CoveredBy(a))
+
+ # basic covers/covered
+ self.assertTrue(b.CoveredBy(d))
+ self.assertTrue(d.Covers(b))
+ self.assertNotEqual(b, d)
+ self.assertFalse(b.Covers(d))
+
+ # completely disjoint
+ self.assertFalse(b.Covers(c))
+ self.assertFalse(c.Covers(b))
+ self.assertFalse(b.CoveredBy(c))
+ self.assertFalse(c.CoveredBy(b))
+
+ # b.stop == e.start
+ self.assertFalse(b.Covers(e))
+ self.assertFalse(b.CoveredBy(e))
+
+ # shared endpoint, start contained
+ self.assertTrue(e.Covers(c))
+ self.assertTrue(c.CoveredBy(e))
+
+ def test_intersect(self):
+
+ a = PacBioBam.PositionInterval(2,4)
+ b = PacBioBam.PositionInterval(3,5)
+ c = PacBioBam.PositionInterval(6,8)
+ d = PacBioBam.PositionInterval(1,7)
+ e = PacBioBam.PositionInterval(5,8)
+
+ # 0123456789
+ # a --
+ # b --
+ # c --
+ # d ------
+ # e ---
+
+ # self-intersection
+ self.assertTrue(a.Intersects(a))
+
+ # intersection is commutative
+ self.assertTrue(a.Intersects(b))
+ self.assertTrue(b.Intersects(a))
+
+ # covered implies intersection
+ self.assertTrue(d.Covers(a))
+ self.assertTrue(a.Intersects(d))
+ self.assertTrue(d.Intersects(a))
+
+ # c.start > b.stop (obvious disjoint)
+ self.assertFalse(b.Intersects(c))
+
+ # b.stop == e.start (intervals are right-open, so disjoint)
+ self.assertFalse(b.Intersects(e))
+
+ def test_validity(self):
+
+ a = PacBioBam.PositionInterval() # default ctor
+ b = PacBioBam.PositionInterval(0,0) # start == stop (zero)
+ c = PacBioBam.PositionInterval(4,4) # start == stop (nonzero)
+ d = PacBioBam.PositionInterval(0,1) # start < stop (start is zero)
+ e = PacBioBam.PositionInterval(4,5) # start < stop (start is nonzero)
+ f = PacBioBam.PositionInterval(5,4) # start > stop
+
+ self.assertFalse(a.IsValid())
+ self.assertFalse(b.IsValid())
+ self.assertFalse(c.IsValid())
+ self.assertTrue(d.IsValid())
+ self.assertTrue(e.IsValid())
+ self.assertFalse(f.IsValid())
+
+ def test_length(self):
+
+ a = PacBioBam.PositionInterval(2,4)
+ b = PacBioBam.PositionInterval(3,5)
+ c = PacBioBam.PositionInterval(6,8)
+ d = PacBioBam.PositionInterval(1,7)
+ e = PacBioBam.PositionInterval(5,8)
+
+ self.assertEqual(2, a.Length())
+ self.assertEqual(2, b.Length())
+ self.assertEqual(2, c.Length())
+ self.assertEqual(6, d.Length())
+ self.assertEqual(3, e.Length())
+
+class GenomicIntervalsTest(unittest.TestCase):
+
+ # ------------ SETUP --------------
+
+ def runTest(self):
+ self.test_ctors()
+ self.test_copy()
+ self.test_modifiers()
+ self.test_cover()
+ self.test_validity()
+
+ # ------------ TESTS --------------
+
+ def test_ctors(self):
+
+ empty = PacBioBam.GenomicInterval()
+ normal = PacBioBam.GenomicInterval("foo", 100, 200)
+
+ self.assertEqual("", empty.Name())
+ self.assertEqual(0, empty.Start())
+ self.assertEqual(0, empty.Stop())
+
+ self.assertEqual("foo", normal.Name())
+ self.assertEqual(100, normal.Start())
+ self.assertEqual(200, normal.Stop())
+
+
+ def test_copy(self):
+
+ a = PacBioBam.GenomicInterval("foo", 10, 20)
+ b = PacBioBam.GenomicInterval(a)
+ c = a
+
+ self.assertEqual(a, a)
+ self.assertEqual(a, b)
+ self.assertEqual(a, c)
+
+ def test_modifiers(self):
+
+ a = PacBioBam.GenomicInterval("foo", 10, 20)
+
+ b = PacBioBam.GenomicInterval(a)
+ b.Name("bar").Start(2).Stop(10)
+
+ c = PacBioBam.GenomicInterval(a)
+ c.Interval(b.Interval())
+
+ self.assertNotEqual(a, b)
+ self.assertEqual("bar", b.Name())
+ self.assertEqual(2, b.Start())
+ self.assertEqual(10, b.Stop())
+ self.assertEqual(a.Name(), c.Name())
+ self.assertEqual(b.Interval(), c.Interval())
+
+ def test_cover(self):
+
+ a = PacBioBam.GenomicInterval("foo",2,4)
+ b = PacBioBam.GenomicInterval("foo",3,5)
+ c = PacBioBam.GenomicInterval("foo",6,8)
+ d = PacBioBam.GenomicInterval("foo",1,7)
+ e = PacBioBam.GenomicInterval("foo",5,8)
+ f = PacBioBam.GenomicInterval("bar",3,5) # same as b, different ref
+
+ # 0123456789
+ # a --
+ # b --
+ # c --
+ # d ------
+ # e ---
+
+ # self-cover
+ self.assertTrue(a.Covers(a))
+ self.assertTrue(a.CoveredBy(a))
+
+ # basic covers/covered
+ self.assertTrue(b.CoveredBy(d))
+ self.assertTrue(d.Covers(b))
+ self.assertNotEqual(b, d)
+ self.assertFalse(b.Covers(d))
+
+ # same coords as b, but different ref
+ self.assertFalse(f.CoveredBy(d))
+ self.assertFalse(d.Covers(f))
+ self.assertNotEqual(f, d)
+ self.assertFalse(f.Covers(d))
+
+ # obvious disjoint
+ self.assertFalse(b.Covers(c))
+ self.assertFalse(c.Covers(b))
+ self.assertFalse(b.CoveredBy(c))
+ self.assertFalse(c.CoveredBy(b))
+
+ # b.stop == e.start (intervals are right-open, so disjoint)
+ self.assertFalse(b.Covers(e))
+ self.assertFalse(b.CoveredBy(e))
+
+ # shared endpoint, start contained
+ self.assertTrue(e.Covers(c))
+ self.assertTrue(c.CoveredBy(e))
+
+ def test_validity(self):
+
+ a = PacBioBam.GenomicInterval() # default
+ b = PacBioBam.GenomicInterval("foo",0,0) # valid id, start == stop (zero)
+ c = PacBioBam.GenomicInterval("foo",4,4) # valid id, start == stop (non-zero)
+ d = PacBioBam.GenomicInterval("foo",0,1) # valid id, start < stop (start == zero) OK
+ e = PacBioBam.GenomicInterval("foo",4,5) # valid id, start < stop (start > zero) OK
+ f = PacBioBam.GenomicInterval("foo",5,4) # valid id, start > stop
+ g = PacBioBam.GenomicInterval("",0,0) # invalid id, start == stop (zero)
+ h = PacBioBam.GenomicInterval("",4,4) # invalid id, start == stop (non-zero)
+ i = PacBioBam.GenomicInterval("",0,1) # invalid id, start < stop (start == zero)
+ j = PacBioBam.GenomicInterval("",4,5) # invalid id, start < stop (start > zero)
+ k = PacBioBam.GenomicInterval("",5,4) # invalid id, start > stop
+
+ self.assertTrue(d.IsValid())
+ self.assertTrue(e.IsValid())
+ self.assertFalse(a.IsValid())
+ self.assertFalse(b.IsValid())
+ self.assertFalse(c.IsValid())
+ self.assertFalse(f.IsValid())
+ self.assertFalse(g.IsValid())
+ self.assertFalse(h.IsValid())
+ self.assertFalse(i.IsValid())
+ self.assertFalse(j.IsValid())
+ self.assertFalse(k.IsValid())
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+import PacBioBam
+import config
+import unittest
+
+class PolymeraseStitchingTest(unittest.TestCase):
+
+ # ------------ SETUP --------------
+
+ def setUp(self):
+ self.data = config.TestData()
+
+ def runTest(self):
+ self.test_virtualRegions()
+ self.test_internalSubreadsToOriginal()
+ self.test_internalHqToOriginal()
+ self.test_productionSubreadsToOriginal()
+ self.test_productionHqToOriginal()
+
+ # ------------ TESTS --------------
+
+ def test_virtualRegions(self):
+
+ subreadBam = self.data.directory + "/polymerase/internal.subreads.bam"
+ scrapsBam = self.data.directory + "/polymerase/internal.scraps.bam"
+ vpr = PacBioBam.VirtualPolymeraseReader(subreadBam, scrapsBam)
+
+ virtualRecord = vpr.Next()
+
+ # NOTE: this method is disabled
+ #
+ # Any attempt to retrive this value resulted in several
+ # "swig/python detected a memory leak of type 'unknown', no destructor found."
+ # errors (& an empty dictionary result). The same info is available via the
+ # VirtualRegionsTable(regionType) method, though a bit clunkier if you just want
+ # to iterate. But access to region info for specific types are available & correct,
+ # so I'm just going to leave this one out for now. - DB
+ #
+ # regionMap = virtualRecord.VirtualRegionsMap();
+
+ # ADAPTER
+ adapter = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_ADAPTER)
+ self.assertEqual(7, len(adapter))
+ self.assertEqual(3047, adapter[0].beginPos);
+ self.assertEqual(3095, adapter[0].endPos);
+ self.assertEqual(3650, adapter[1].beginPos);
+ self.assertEqual(3700, adapter[1].endPos);
+ self.assertEqual(4289, adapter[2].beginPos);
+ self.assertEqual(4335, adapter[2].endPos);
+ self.assertEqual(4888, adapter[3].beginPos);
+ self.assertEqual(4939, adapter[3].endPos);
+ self.assertEqual(5498, adapter[4].beginPos);
+ self.assertEqual(5546, adapter[4].endPos);
+ self.assertEqual(6116, adapter[5].beginPos);
+ self.assertEqual(6173, adapter[5].endPos);
+ self.assertEqual(6740, adapter[6].beginPos);
+ self.assertEqual(6790, adapter[6].endPos);
+
+ # BARCODE
+ barcode = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_BARCODE)
+ self.assertEqual(14, len(barcode))
+ self.assertEqual(3025, barcode[0].beginPos);
+ self.assertEqual(3047, barcode[0].endPos);
+ self.assertEqual(3095, barcode[1].beginPos);
+ self.assertEqual(3116, barcode[1].endPos);
+ self.assertEqual(3628, barcode[2].beginPos);
+ self.assertEqual(3650, barcode[2].endPos);
+ self.assertEqual(3700, barcode[3].beginPos);
+ self.assertEqual(3722, barcode[3].endPos);
+ self.assertEqual(4267, barcode[4].beginPos);
+ self.assertEqual(4289, barcode[4].endPos);
+ self.assertEqual(4335, barcode[5].beginPos);
+ self.assertEqual(4356, barcode[5].endPos);
+ self.assertEqual(4864, barcode[6].beginPos);
+ self.assertEqual(4888, barcode[6].endPos);
+ self.assertEqual(4939, barcode[7].beginPos);
+ self.assertEqual(4960, barcode[7].endPos);
+ self.assertEqual(5477, barcode[8].beginPos);
+ self.assertEqual(5498, barcode[8].endPos);
+ self.assertEqual(5546, barcode[9].beginPos);
+ self.assertEqual(5571, barcode[9].endPos);
+ self.assertEqual(6087, barcode[10].beginPos);
+ self.assertEqual(6116, barcode[10].endPos);
+ self.assertEqual(6173, barcode[11].beginPos);
+ self.assertEqual(6199, barcode[11].endPos);
+ self.assertEqual(6719, barcode[12].beginPos);
+ self.assertEqual(6740, barcode[12].endPos);
+ self.assertEqual(6790, barcode[13].beginPos);
+ self.assertEqual(6812, barcode[13].endPos);
+
+ # HQREGION
+ hqregion = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_HQREGION)
+ self.assertEqual(1, len(hqregion))
+
+ self.assertEqual(2659, hqregion[0].beginPos);
+ self.assertEqual(7034, hqregion[0].endPos);
+
+ # LQREGION
+ lqregion = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_LQREGION)
+ self.assertEqual(2, len(lqregion))
+
+ self.assertEqual(0, lqregion[0].beginPos);
+ self.assertEqual(2659, lqregion[0].endPos);
+ self.assertEqual(7034, lqregion[1].beginPos);
+ self.assertEqual(7035, lqregion[1].endPos);
+
+ # SUBREAD
+ subread = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_SUBREAD)
+ self.assertEqual(8, len(subread))
+
+ def test_internalSubreadsToOriginal(self):
+
+ # stitch virtual polymerase record
+ subreadsBam = self.data.directory + "/polymerase/internal.subreads.bam"
+ scrapsBam = self.data.directory + "/polymerase/internal.scraps.bam"
+ vpr = PacBioBam.VirtualPolymeraseReader(subreadsBam, scrapsBam)
+ self.assertTrue(vpr.HasNext())
+ virtualRecord = vpr.Next()
+
+ # fetch original polymerase record
+ polyBam = PacBioBam.DataSet(self.data.directory + "/polymerase/internal.polymerase.bam")
+ polyQuery = PacBioBam.EntireFileQuery(polyBam)
+ polyIter = polyQuery.begin()
+ polyEnd = polyQuery.end()
+ self.assertTrue(polyIter != polyEnd)
+ polyRecord = polyIter.value()
+
+ # compare
+ self.compare(polyRecord, virtualRecord)
+
+ def test_internalHqToOriginal(self):
+
+ # stitch virtual polymerase record
+ hqRegionsBam = self.data.directory + "/polymerase/internal.hqregions.bam"
+ lqRegionsBam = self.data.directory + "/polymerase/internal.lqregions.bam"
+ vpr = PacBioBam.VirtualPolymeraseReader(hqRegionsBam, lqRegionsBam)
+ self.assertTrue(vpr.HasNext())
+ virtualRecord = vpr.Next()
+
+ # fetch original polymerase record
+ polyBam = PacBioBam.DataSet(self.data.directory + "/polymerase/internal.polymerase.bam")
+ polyQuery = PacBioBam.EntireFileQuery(polyBam)
+ polyIter = polyQuery.begin()
+ polyEnd = polyQuery.end()
+ self.assertTrue(polyIter != polyEnd)
+ polyRecord = polyIter.value()
+
+ # # compare
+ self.compare(polyRecord, virtualRecord)
+
+ def test_productionSubreadsToOriginal(self):
+
+ # stitch virtual polymerase record
+ subreadsBam = self.data.directory + "/polymerase/production.subreads.bam"
+ scrapsBam = self.data.directory + "/polymerase/production.scraps.bam"
+ vpr = PacBioBam.VirtualPolymeraseReader(subreadsBam, scrapsBam)
+ self.assertTrue(vpr.HasNext())
+ virtualRecord = vpr.Next()
+
+ # fetch original polymerase record
+ polyBam = PacBioBam.DataSet(self.data.directory + "/polymerase/production.polymerase.bam")
+ polyQuery = PacBioBam.EntireFileQuery(polyBam)
+ polyIter = polyQuery.begin()
+ polyEnd = polyQuery.end()
+ self.assertTrue(polyIter != polyEnd)
+ polyRecord = polyIter.value()
+
+ # compare
+ self.assertEqual(polyRecord.FullName(), virtualRecord.FullName());
+ self.assertEqual(polyRecord.HoleNumber(), virtualRecord.HoleNumber());
+ self.assertEqual(polyRecord.NumPasses(), virtualRecord.NumPasses());
+ self.assertEqual(polyRecord.Sequence(), virtualRecord.Sequence());
+ self.assertEqual(polyRecord.DeletionTag(), virtualRecord.DeletionTag());
+ self.assertEqual(polyRecord.SubstitutionTag(), virtualRecord.SubstitutionTag());
+ self.assertEqual(polyRecord.IPD(), virtualRecord.IPDV1Frames());
+ self.assertEqual(polyRecord.ReadGroup(), virtualRecord.ReadGroup());
+
+ self.assertAlmostEqual(float(polyRecord.ReadAccuracy()), float(virtualRecord.ReadAccuracy()));
+
+ self.assertEqual(polyRecord.Qualities().Fastq(), virtualRecord.Qualities().Fastq());
+ self.assertEqual(polyRecord.DeletionQV().Fastq(), virtualRecord.DeletionQV().Fastq());
+ self.assertEqual(polyRecord.InsertionQV().Fastq(), virtualRecord.InsertionQV().Fastq());
+ self.assertEqual(polyRecord.MergeQV().Fastq(), virtualRecord.MergeQV().Fastq());
+ self.assertEqual(polyRecord.SubstitutionQV().Fastq(), virtualRecord.SubstitutionQV().Fastq());
+
+ def test_productionHqToOriginal(self):
+
+ # stitch virtual polymerase record
+ hqRegionsBam = self.data.directory + "/polymerase/production_hq.hqregion.bam"
+ lqRegionsBam = self.data.directory + "/polymerase/production_hq.scraps.bam"
+ vpr = PacBioBam.VirtualPolymeraseReader(hqRegionsBam, lqRegionsBam)
+ self.assertTrue(vpr.HasNext())
+ virtualRecord = vpr.Next()
+
+ # fetch original polymerase record
+ polyBam = PacBioBam.DataSet(self.data.directory + "/polymerase/production.polymerase.bam")
+ polyQuery = PacBioBam.EntireFileQuery(polyBam)
+ polyIter = polyQuery.begin()
+ polyEnd = polyQuery.end()
+ self.assertTrue(polyIter != polyEnd)
+ polyRecord = polyIter.value()
+
+ # compare
+ self.assertFalse(polyRecord.HasPulseCall());
+ self.assertFalse(virtualRecord.HasPulseCall());
+
+ self.assertEqual(polyRecord.FullName(), virtualRecord.FullName());
+ self.assertEqual(polyRecord.HoleNumber(), virtualRecord.HoleNumber());
+ self.assertEqual(polyRecord.NumPasses(), virtualRecord.NumPasses());
+ self.assertEqual(polyRecord.Sequence(), virtualRecord.Sequence());
+ self.assertEqual(polyRecord.DeletionTag(), virtualRecord.DeletionTag());
+ self.assertEqual(polyRecord.SubstitutionTag(), virtualRecord.SubstitutionTag());
+ self.assertEqual(polyRecord.IPD(), virtualRecord.IPDV1Frames());
+ self.assertEqual(polyRecord.ReadGroup(), virtualRecord.ReadGroup());
+
+ self.assertAlmostEqual(float(polyRecord.ReadAccuracy()), float(virtualRecord.ReadAccuracy()));
+
+ self.assertEqual(polyRecord.Qualities().Fastq(), virtualRecord.Qualities().Fastq());
+ self.assertEqual(polyRecord.DeletionQV().Fastq(), virtualRecord.DeletionQV().Fastq());
+ self.assertEqual(polyRecord.InsertionQV().Fastq(), virtualRecord.InsertionQV().Fastq());
+ self.assertEqual(polyRecord.MergeQV().Fastq(), virtualRecord.MergeQV().Fastq());
+ self.assertEqual(polyRecord.SubstitutionQV().Fastq(), virtualRecord.SubstitutionQV().Fastq());
+
+ self.assertTrue(polyRecord.HasDeletionQV());
+ self.assertTrue(polyRecord.HasDeletionTag());
+ self.assertTrue(polyRecord.HasInsertionQV());
+ self.assertTrue(polyRecord.HasMergeQV());
+ self.assertTrue(polyRecord.HasSubstitutionQV());
+ self.assertTrue(polyRecord.HasSubstitutionTag());
+ self.assertTrue(polyRecord.HasIPD());
+ self.assertFalse(polyRecord.HasLabelQV());
+ self.assertFalse(polyRecord.HasAltLabelQV());
+ self.assertFalse(polyRecord.HasAltLabelTag());
+ self.assertFalse(polyRecord.HasPkmean());
+ self.assertFalse(polyRecord.HasPkmid());
+ self.assertFalse(polyRecord.HasPulseCall());
+ self.assertFalse(polyRecord.HasPulseWidth());
+ self.assertFalse(polyRecord.HasPrePulseFrames());
+ self.assertFalse(polyRecord.HasPulseCallWidth());
+
+ self.assertTrue(virtualRecord.HasDeletionQV());
+ self.assertTrue(virtualRecord.HasDeletionTag());
+ self.assertTrue(virtualRecord.HasInsertionQV());
+ self.assertTrue(virtualRecord.HasMergeQV());
+ self.assertTrue(virtualRecord.HasSubstitutionQV());
+ self.assertTrue(virtualRecord.HasSubstitutionTag());
+ self.assertTrue(virtualRecord.HasIPD());
+ self.assertFalse(virtualRecord.HasLabelQV());
+ self.assertFalse(virtualRecord.HasAltLabelQV());
+ self.assertFalse(virtualRecord.HasAltLabelTag());
+ self.assertFalse(virtualRecord.HasPkmean());
+ self.assertFalse(virtualRecord.HasPkmid());
+ self.assertFalse(virtualRecord.HasPulseCall());
+ self.assertFalse(virtualRecord.HasPulseWidth());
+ self.assertFalse(virtualRecord.HasPrePulseFrames());
+ self.assertFalse(virtualRecord.HasPulseCallWidth());
+
+ # ------------ HELPERS --------------
+
+ def compare(self, b1, b2):
+
+ self.assertTrue(b1.HasDeletionQV());
+ self.assertTrue(b1.HasDeletionTag());
+ self.assertTrue(b1.HasInsertionQV());
+ self.assertTrue(b1.HasMergeQV());
+ self.assertTrue(b1.HasSubstitutionQV());
+ self.assertTrue(b1.HasSubstitutionTag());
+ self.assertTrue(b1.HasLabelQV());
+ self.assertTrue(b1.HasAltLabelQV());
+ self.assertTrue(b1.HasAltLabelTag());
+ self.assertTrue(b1.HasPkmean());
+ self.assertTrue(b1.HasPkmid());
+ self.assertTrue(b1.HasPulseCall());
+ self.assertTrue(b1.HasIPD());
+ self.assertTrue(b1.HasPulseWidth());
+ self.assertTrue(b1.HasPrePulseFrames());
+ self.assertTrue(b1.HasPulseCallWidth());
+ self.assertTrue(b1.HasPulseMergeQV());
+
+ self.assertTrue(b2.HasDeletionQV());
+ self.assertTrue(b2.HasDeletionTag());
+ self.assertTrue(b2.HasInsertionQV());
+ self.assertTrue(b2.HasMergeQV());
+ self.assertTrue(b2.HasSubstitutionQV());
+ self.assertTrue(b2.HasSubstitutionTag());
+ self.assertTrue(b2.HasLabelQV());
+ self.assertTrue(b2.HasAltLabelQV());
+ self.assertTrue(b2.HasAltLabelTag());
+ self.assertTrue(b2.HasPkmean());
+ self.assertTrue(b2.HasPkmid());
+ self.assertTrue(b2.HasPulseCall());
+ self.assertTrue(b2.HasIPD());
+ self.assertTrue(b2.HasPulseWidth());
+ self.assertTrue(b2.HasPrePulseFrames());
+ self.assertTrue(b2.HasPulseCallWidth());
+ self.assertTrue(b2.HasPulseMergeQV());
+
+ self.assertEqual(b1.FullName(), b2.FullName());
+ self.assertEqual(b1.HoleNumber(), b2.HoleNumber());
+ self.assertEqual(b1.NumPasses(), b2.NumPasses());
+ self.assertEqual(b1.Sequence(), b2.Sequence());
+ self.assertEqual(b1.DeletionTag(), b2.DeletionTag());
+ self.assertEqual(b1.SubstitutionTag(), b2.SubstitutionTag());
+ self.assertEqual(b1.AltLabelTag(), b2.AltLabelTag());
+ self.assertEqual(b1.Pkmean(), b2.Pkmean());
+ self.assertEqual(b1.Pkmid(), b2.Pkmid());
+ self.assertEqual(b1.PulseCall(), b2.PulseCall());
+ self.assertEqual(b1.IPD(), b2.IPD());
+ self.assertEqual(b1.PulseWidth(), b2.PulseWidth());
+ self.assertEqual(b1.PrePulseFrames(), b2.PrePulseFrames());
+ self.assertEqual(b1.PulseCallWidth(), b2.PulseCallWidth());
+ self.assertEqual(b1.ReadGroup(), b2.ReadGroup());
+
+ self.assertEqual(b1.Qualities().Fastq(), b2.Qualities().Fastq());
+ self.assertEqual(b1.DeletionQV().Fastq(), b2.DeletionQV().Fastq());
+ self.assertEqual(b1.InsertionQV().Fastq(), b2.InsertionQV().Fastq());
+ self.assertEqual(b1.MergeQV().Fastq(), b2.MergeQV().Fastq());
+ self.assertEqual(b1.SubstitutionQV().Fastq(), b2.SubstitutionQV().Fastq());
+ self.assertEqual(b1.PulseMergeQV().Fastq(), b2.PulseMergeQV().Fastq());
+ self.assertEqual(b1.LabelQV().Fastq(), b2.LabelQV().Fastq());
+ self.assertEqual(b1.AltLabelQV().Fastq(), b2.AltLabelQV().Fastq());
+
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+import PacBioBam
+import config
+import unittest
+
+class QualityValueTest(unittest.TestCase):
+
+ # ------------ SETUP --------------
+
+ def runTest(self):
+ self.test_defaults()
+ self.test_fromNumber()
+ self.test_fromFastq()
+
+ # ------------ TESTS --------------
+
+ def test_defaults(self):
+ value = PacBioBam.QualityValue()
+ self.assertEqual(0, int(value))
+ self.assertEqual('!', value.Fastq())
+
+ def test_fromNumber(self):
+
+ zero = PacBioBam.QualityValue(0)
+ thirtythree = PacBioBam.QualityValue(33)
+ normal = PacBioBam.QualityValue(42)
+ maxQV = PacBioBam.QualityValue(93)
+ tooHigh = PacBioBam.QualityValue(94)
+ max8bit = PacBioBam.QualityValue(126)
+
+ self.assertEqual(0, int(zero))
+ self.assertEqual(33, int(thirtythree))
+ self.assertEqual(42, int(normal))
+ self.assertEqual(93, int(maxQV))
+ self.assertEqual(93, int(tooHigh))
+ self.assertEqual(93, int(max8bit))
+
+ self.assertEqual('!', zero.Fastq())
+ self.assertEqual('B', thirtythree.Fastq())
+ self.assertEqual('K', normal.Fastq())
+ self.assertEqual('~', maxQV.Fastq())
+ self.assertEqual('~', tooHigh.Fastq())
+ self.assertEqual('~', max8bit.Fastq())
+
+ def test_fromFastq(self):
+
+ zero = PacBioBam.QualityValue.FromFastq('!')
+ thirtythree = PacBioBam.QualityValue.FromFastq('B')
+ normal = PacBioBam.QualityValue.FromFastq('K')
+ maxQV = PacBioBam.QualityValue.FromFastq('~')
+
+ self.assertEqual(0, int(zero))
+ self.assertEqual(33, int(thirtythree))
+ self.assertEqual(42, int(normal))
+ self.assertEqual(93, int(maxQV))
+
+class QualityValuesTest(unittest.TestCase):
+
+ # ------------ SETUP --------------
+
+ def runTest(self):
+ self.test_defaults()
+ self.test_fromNumbers()
+ self.test_fromFastq()
+
+ # ------------ TESTS --------------
+
+ def test_defaults(self):
+ values = PacBioBam.QualityValues()
+ self.assertFalse(values.Fastq())
+
+ def test_fromNumbers(self):
+
+ fastqString = "~~~KKBB!!"
+ values = [ 93, 93, 93, 42, 42, 33, 33, 0, 0 ]
+
+ qvs = PacBioBam.QualityValues()
+ for value in values:
+ qvs.append(PacBioBam.QualityValue(value))
+
+ self.assertEqual(fastqString, qvs.Fastq())
+
+ def test_fromFastq(self):
+
+ fastqString = "~~~KKBB!!"
+ values = [ 93, 93, 93, 42, 42, 33, 33, 0, 0 ]
+
+ qvs = PacBioBam.QualityValues.FromFastq(fastqString)
+
+ self.assertEqual(len(fastqString), len(qvs))
+ self.assertEqual(len(values), len(qvs))
+
+ for i, v in enumerate(values):
+ self.assertEqual(v, int(qvs[i]))
+
\ No newline at end of file
--- /dev/null
+# Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted (subject to the limitations in the
+# disclaimer below) provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+#
+# * Neither the name of Pacific Biosciences nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+# GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+# BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Author: Derek Barnett
+
+import sys
+import unittest
+
+if __name__ == "__main__":
+ suite = unittest.TestLoader().discover('.', pattern = "test_*.py")
+ result = unittest.TextTestRunner(verbosity=2).run(suite)
+ if result.wasSuccessful():
+ sys.exit(0)
+ else:
+ sys.exit(1)
+
\ No newline at end of file
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/Accuracy.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(AccuracyTest, ClampValues)
+{
+ Accuracy a_zero(0.0);
+ Accuracy a_neg(-0.5);
+ Accuracy a_min(0.0);
+ Accuracy a_normal(0.9);
+ Accuracy a_max(1.0);
+ Accuracy a_tooLarge(1.1);
+
+ EXPECT_FLOAT_EQ(0.0, a_zero);
+ EXPECT_FLOAT_EQ(0.0, a_neg);
+ EXPECT_FLOAT_EQ(0.0, a_min);
+ EXPECT_FLOAT_EQ(0.9, a_normal);
+ EXPECT_FLOAT_EQ(1.0, a_max);
+ EXPECT_FLOAT_EQ(1.0, a_tooLarge);
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+
+#include <pbbam/AlignmentPrinter.h>
+#include <pbbam/BamFile.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/EntireFileQuery.h>
+#include <pbbam/IndexedFastaReader.h>
+#include <string>
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+const string lambdaFasta = tests::Data_Dir + "/lambdaNEB.fa";
+const string singleInsertionBam = tests::Data_Dir + "/aligned.bam";
+
+TEST(AlignmentPrinterTest, Print)
+{
+ IndexedFastaReader r(lambdaFasta);
+ AlignmentPrinter pretty(r);
+
+ BamFile bamFile(singleInsertionBam);
+ EntireFileQuery bamQuery(bamFile);
+ auto it = bamQuery.begin();
+
+ // funky formatting used to format alignments
+ auto expected = string
+ {
+ "Read : singleInsertion/100/0_49\n"
+ "Reference : lambda_NEB3011\n"
+ "\n"
+ "Read-length : 49\n"
+ "Concordance : 0.96\n"
+ "\n"
+ "5210 : GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGG : 5249\n"
+ " \x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||| |\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||| ||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||||\n"
+ " 0 : GGCTGCAG-GTACAGCGGTCAGGAGGCCAATTGATGCCGG : 39\n"
+ "\n"
+ "5249 : ACTGGCTGAT : 5259\n"
+ " |\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||||\n"
+ " 39 : ACTGGCTGAT : 49\n"
+ "\n"
+ };
+
+ auto record = *it++;
+ EXPECT_EQ(expected, pretty.Print(record, Orientation::GENOMIC));
+
+ expected = string
+ {
+ "Read : singleInsertion/200/0_49\n"
+ "Reference : lambda_NEB3011\n"
+ "\n"
+ "Read-length : 49\n"
+ "Concordance : 0.96\n"
+ "\n"
+ "5210 : GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGG : 5249\n"
+ " \x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||| |\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||| ||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||||\n"
+ " 0 : GGCTGCAG-GTACAGCGGTCAGGAGGCCAATTGATGCCGG : 39\n"
+ "\n"
+ "5249 : ACTGGCTGAT : 5259\n"
+ " |\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||||\n"
+ " 39 : ACTGGCTGAT : 49\n"
+ "\n"
+ };
+
+ record = *it++;
+ EXPECT_EQ(expected, pretty.Print(record, Orientation::GENOMIC));
+
+ expected = string
+ {
+ "Read : singleInsertion/100/0_111\n"
+ "Reference : lambda_NEB3011\n"
+ "\n"
+ "Read-length : 59\n"
+ "Concordance : 0.951\n"
+ "\n"
+ "9377 : AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCG : 9417\n"
+ " |||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||| |\n"
+ " 0 : AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGA--G : 38\n"
+ "\n"
+ "9417 : CAGCACGGT-AACAGCGGCAA : 9437\n"
+ " |||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||| ||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||\n"
+ " 38 : CAGCACGGTAAACAGCGGCAA : 59\n"
+ "\n"
+ };
+
+ record = *it++;
+ EXPECT_EQ(expected, pretty.Print(record, Orientation::GENOMIC));
+
+ expected = string
+ {
+ "Read : singleInsertion/100/0_111\n"
+ "Reference : lambda_NEB3011\n"
+ "\n"
+ "Read-length : 59\n"
+ "Concordance : 0.951\n"
+ "\n"
+ "9377 : AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCG : 9417\n"
+ " |||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m|||||||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||| |\n"
+ " 0 : AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGA--G : 38\n"
+ "\n"
+ "9417 : CAGCACGGT-AACAGCGGCAA : 9437\n"
+ " |||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||| ||||\x1B[1m\x1B[31m|\x1B[0m\x1B[39;49m||||||\n"
+ " 38 : CAGCACGGTAAACAGCGGCAA : 59\n"
+ "\n"
+ };
+
+ record = *it++;
+ EXPECT_EQ(expected, pretty.Print(record, Orientation::GENOMIC));
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/BamFile.h>
+#include <pbbam/EntireFileQuery.h>
+#include <pbbam/../../src/FileUtils.h>
+#include <stdexcept>
+#include <cstdlib>
+#include <unistd.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
+template<typename T>
+void CheckFile(const T& input, const size_t expectedCount)
+{
+ size_t observedCount = 0;
+ EntireFileQuery entireFile(input);
+ for (const BamRecord& r : entireFile) {
+ (void)r;
+ ++observedCount;
+ }
+ EXPECT_EQ(expectedCount, observedCount);
+}
+
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
+TEST(BamFileTest, NonExistentFileThrows)
+{
+ EXPECT_THROW(BamFile{ "does_not_exist.bam" }, std::runtime_error);
+}
+
+TEST(BamFileTest, NonBamFileThrows)
+{
+ EXPECT_THROW(BamFile { tests::Data_Dir + "/lambdaNEB.fa.fai" }, std::runtime_error);
+}
+
+TEST(BamFileTest, RelativePathBamOk)
+{
+ // cache current working directory, then drill down so we can point to
+ // BAMs using relative path
+ const string cwd = internal::FileUtils::CurrentWorkingDirectory();
+ ASSERT_EQ(0, chdir(tests::Data_Dir.c_str()));
+ ASSERT_EQ(0, chdir("relative/a"));
+
+ // BamFile from relative BAM fn
+ tests::CheckFile(BamFile{ "../b/test1.bam" }, 3);
+
+ // dataset from relative BAM fn
+ tests::CheckFile(DataSet{ "../b/test1.bam" }, 3);
+
+ // dataset from BamFile object (itself from relative BAM fn)
+ {
+ auto file = BamFile{"../b/test1.bam"};
+ tests::CheckFile(DataSet{ file }, 3);
+ }
+
+ // restore working directory
+ ASSERT_EQ(0, chdir(cwd.c_str()));
+}
+
+TEST(BamFileTest, RelativePathXmlOk)
+{
+ // cache current working directory, then drill down so we can point to
+ // BAMs using relative path
+ const string cwd = internal::FileUtils::CurrentWorkingDirectory();
+ ASSERT_EQ(0, chdir(tests::Data_Dir.c_str()));
+
+ // dataset from XML containing relative paths
+ tests::CheckFile(DataSet{ "relative/relative.xml" }, 9);
+
+ // restore working directory
+ ASSERT_EQ(0, chdir(cwd.c_str()));
+}
+
+TEST(BamFileTest, RelativePathFofnOk)
+{
+ // cache current working directory, then drill down so we can point to
+ // BAMs using relative path
+ const string cwd = internal::FileUtils::CurrentWorkingDirectory();
+ ASSERT_EQ(0, chdir(tests::Data_Dir.c_str()));
+
+ // dataset from FOFN containing relative paths
+ tests::CheckFile(DataSet{ "relative/relative.fofn" }, 9);
+
+ // NOTE: doesn't yet support a FOFN containing an XML with relative paths
+// tests::CheckFile(DataSet{ "relative/relative2.fofn" }, 60);
+
+ // restore working directory
+ ASSERT_EQ(0, chdir(cwd.c_str()));
+}
+
+TEST(BamFileTest, TruncatedFileThrowsOk)
+{
+ EXPECT_THROW(BamFile{ tests::GeneratedData_Dir + "/truncated.bam" }, std::runtime_error);
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <htslib/sam.h>
+#include <pbbam/BamHeader.h>
+#include <iostream>
+#include <string>
+#include <utility>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
+struct BamHdrDeleter
+{
+ void operator()(bam_hdr_t* hdr) {
+ if (hdr)
+ bam_hdr_destroy(hdr);
+ hdr = nullptr;
+ }
+};
+
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
+TEST(BamHeaderTest, DefaultConstruction)
+{
+ BamHeader header;
+ EXPECT_TRUE(header.Version().empty());
+ EXPECT_TRUE(header.SortOrder().empty()); // default to unknown ?
+ EXPECT_TRUE(header.ReadGroups().empty());
+ EXPECT_TRUE(header.Sequences().empty());
+ EXPECT_TRUE(header.Programs().empty());
+ EXPECT_TRUE(header.Comments().empty());
+
+ EXPECT_THROW(header.Program("foo"), std::exception);
+ EXPECT_THROW(header.ReadGroup("foo"), std::exception);
+ EXPECT_THROW(header.SequenceId("foo"), std::exception);
+ EXPECT_THROW(header.SequenceLength(42), std::exception);
+ EXPECT_THROW(header.SequenceName(42), std::exception);
+}
+
+TEST(BamHeaderTest, DecodeTest)
+{
+ const string& text = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
+ "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
+ "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
+ "@RG\tID:rg1\tSM:control\n"
+ "@RG\tID:rg2\tSM:condition1\n"
+ "@RG\tID:rg3\tSM:condition1\n"
+ "@PG\tID:_foo_\tPN:ide\n"
+ "@CO\tipsum and so on\n"
+ "@CO\tcitation needed\n";
+
+ BamHeader header = BamHeader(text);
+
+ EXPECT_EQ(string("1.1"), header.Version());
+ EXPECT_EQ(string("queryname"), header.SortOrder());
+ EXPECT_EQ(string("3.0.1"), header.PacBioBamVersion());
+
+ EXPECT_EQ(3, header.ReadGroups().size());
+ EXPECT_TRUE(header.HasReadGroup("rg1"));
+ EXPECT_TRUE(header.HasReadGroup("rg2"));
+ EXPECT_TRUE(header.HasReadGroup("rg3"));
+
+ EXPECT_EQ(string("control"), header.ReadGroup("rg1").Sample());
+ EXPECT_EQ(string("condition1"), header.ReadGroup("rg2").Sample());
+ EXPECT_EQ(string("condition1"), header.ReadGroup("rg3").Sample());
+
+ EXPECT_EQ(2, header.Sequences().size());
+ EXPECT_TRUE(header.HasSequence("chr1"));
+ EXPECT_TRUE(header.HasSequence("chr2"));
+ EXPECT_EQ(string("chocobo"), header.Sequence("chr1").Species());
+ EXPECT_EQ(string("chocobo"), header.Sequence("chr2").Species());
+ EXPECT_EQ(string("2038"), header.Sequence("chr1").Length());
+ EXPECT_EQ(string("3042"), header.Sequence("chr2").Length());
+
+ EXPECT_EQ(1, header.Programs().size());
+ EXPECT_TRUE(header.HasProgram("_foo_"));
+ EXPECT_EQ(string("ide"), header.Program("_foo_").Name());
+
+ EXPECT_EQ(2, header.Comments().size());
+ EXPECT_EQ(string("ipsum and so on"), header.Comments().at(0));
+ EXPECT_EQ(string("citation needed"), header.Comments().at(1));
+}
+
+TEST(BamHeaderTest, VersionCheckOk)
+{
+ auto expectFail = [](string&& label, string&& text)
+ {
+ SCOPED_TRACE(label);
+ EXPECT_THROW(BamHeader{ text }, std::runtime_error);
+ };
+ expectFail("empty version", "@HD\tVN:1.1\tSO:queryname\tpb:\n");
+ expectFail("old beta version", "@HD\tVN:1.1\tSO:queryname\tpb:3.0b3\n");
+ expectFail("old beta version", "@HD\tVN:1.1\tSO:queryname\tpb:3.0b7\n");
+ expectFail("invalid value", "@HD\tVN:1.1\tSO:queryname\tpb:3.0.should_not_work\n");
+ expectFail("earlier than minimum", "@HD\tVN:1.1\tSO:queryname\tpb:3.0.0\n");
+
+ // correct version syntax, number
+ EXPECT_NO_THROW(BamHeader{ "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n" });
+}
+
+TEST(BamHeaderTest, EncodeTest)
+{
+ ReadGroupInfo rg1("rg1");
+ rg1.Sample("control");
+ ReadGroupInfo rg2("rg2");
+ rg2.Sample("condition1");
+ ReadGroupInfo rg3("rg3");
+ rg3.Sample("condition1");
+
+ SequenceInfo seq1("chr1");
+ seq1.Length("2038").Species("chocobo");
+ SequenceInfo seq2("chr2");
+ seq2.Length("3042").Species("chocobo");
+
+ ProgramInfo prog1("_foo_");
+ prog1.Name("ide");
+
+ BamHeader header;
+ header.Version("1.1")
+ .SortOrder("queryname")
+ .PacBioBamVersion("3.0.1")
+ .AddReadGroup(rg1)
+ .AddReadGroup(rg2)
+ .AddReadGroup(rg3)
+ .AddSequence(seq1)
+ .AddSequence(seq2)
+ .AddProgram(prog1)
+ .AddComment("ipsum and so on")
+ .AddComment("citation needed");
+
+ const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
+ "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
+ "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
+ "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n"
+ "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+ "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+ "@PG\tID:_foo_\tPN:ide\n"
+ "@CO\tipsum and so on\n"
+ "@CO\tcitation needed\n";
+
+ const string& text = header.ToSam();
+ EXPECT_EQ(expectedText, text);
+}
+
+TEST(BamHeaderTest, ConvertToRawDataOk)
+{
+ ReadGroupInfo rg1("rg1");
+ rg1.Sample("control");
+ ReadGroupInfo rg2("rg2");
+ rg2.Sample("condition1");
+ ReadGroupInfo rg3("rg3");
+ rg3.Sample("condition1");
+
+ SequenceInfo seq1("chr1");
+ seq1.Length("2038").Species("chocobo");
+ SequenceInfo seq2("chr2");
+ seq2.Length("3042").Species("chocobo");
+
+ ProgramInfo prog1("_foo_");
+ prog1.Name("ide");
+
+ BamHeader header;
+ header.Version("1.1")
+ .SortOrder("queryname")
+ .PacBioBamVersion("3.0.1")
+ .AddReadGroup(rg1)
+ .AddReadGroup(rg2)
+ .AddReadGroup(rg3)
+ .AddSequence(seq1)
+ .AddSequence(seq2)
+ .AddProgram(prog1)
+ .AddComment("ipsum and so on")
+ .AddComment("citation needed");
+
+ const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
+ "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
+ "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
+ "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n"
+ "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+ "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+ "@PG\tID:_foo_\tPN:ide\n"
+ "@CO\tipsum and so on\n"
+ "@CO\tcitation needed\n";
+
+
+ const string& text = header.ToSam();
+ PBBAM_SHARED_PTR<bam_hdr_t> rawData(sam_hdr_parse(text.size(), text.c_str()), tests::BamHdrDeleter());
+ rawData->ignore_sam_err = 0;
+ rawData->cigar_tab = NULL;
+ rawData->l_text = text.size();
+ rawData->text = (char*)calloc(rawData->l_text + 1, 1);
+ memcpy(rawData->text, text.c_str(), rawData->l_text);
+
+ const string& rawText = string(rawData->text, rawData->l_text);
+ EXPECT_EQ(expectedText, rawText);
+}
+
+TEST(BamHeaderTest, ExtractFromRawDataOk)
+{
+ ReadGroupInfo rg1("rg1");
+ rg1.Sample("control");
+ ReadGroupInfo rg2("rg2");
+ rg2.Sample("condition1");
+ ReadGroupInfo rg3("rg3");
+ rg3.Sample("condition1");
+
+ SequenceInfo seq1("chr1");
+ seq1.Length("2038").Species("chocobo");
+ SequenceInfo seq2("chr2");
+ seq2.Length("3042").Species("chocobo");
+
+ ProgramInfo prog1("_foo_");
+ prog1.Name("ide");
+
+ BamHeader header;
+ header.Version("1.1")
+ .SortOrder("queryname")
+ .PacBioBamVersion("3.0.1")
+ .AddReadGroup(rg1)
+ .AddReadGroup(rg2)
+ .AddReadGroup(rg3)
+ .AddSequence(seq1)
+ .AddSequence(seq2)
+ .AddProgram(prog1)
+ .AddComment("ipsum and so on")
+ .AddComment("citation needed");
+
+ const string& expectedText = "@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n"
+ "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n"
+ "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n"
+ "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n"
+ "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+ "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n"
+ "@PG\tID:_foo_\tPN:ide\n"
+ "@CO\tipsum and so on\n"
+ "@CO\tcitation needed\n";
+
+
+ string text = header.ToSam();
+ PBBAM_SHARED_PTR<bam_hdr_t> rawData(sam_hdr_parse(text.size(), text.c_str()), tests::BamHdrDeleter());
+ rawData->ignore_sam_err = 0;
+ rawData->cigar_tab = NULL;
+ rawData->l_text = text.size();
+ rawData->text = (char*)calloc(rawData->l_text + 1, 1);
+ memcpy(rawData->text, text.c_str(), rawData->l_text);
+
+ const BamHeader newHeader = BamHeader(string(rawData->text, rawData->l_text));
+
+ EXPECT_EQ(header.Version(), newHeader.Version());
+ EXPECT_EQ(header.SortOrder(), newHeader.SortOrder());
+ EXPECT_EQ(header.PacBioBamVersion(), newHeader.PacBioBamVersion());
+
+ text = newHeader.ToSam();
+ EXPECT_EQ(expectedText, text);
+}
+
+TEST(BamHeaderTest, MergeOk)
+{
+ const string hdrText1 = {
+ "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
+ "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+ "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t"
+ "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\t"
+ "PM:SEQUEL\n"
+ "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n"
+ "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n"
+ "@CO\tcomment1\n"
+ };
+
+ const string hdrText2 = {
+ "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
+ "@RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;"
+ "PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;"
+ "PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;"
+ "BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;"
+ "FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\t"
+ "PM:SEQUEL\n"
+ "@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0\n"
+ "@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0\n"
+ "@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0\n"
+ "@CO\tcomment2\n"
+ };
+
+ const string mergedText = {
+ "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
+ "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+ "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t"
+ "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\t"
+ "PM:SEQUEL\n"
+ "@RG\tID:e83fc9c6\tPL:PACBIO\tDS:READTYPE=SCRAP;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;SubstitutionTag=st;Ipd:Frames=ip;"
+ "PulseWidth:Frames=pw;PkMid=pm;PkMean=pa;LabelQV=pq;AltLabel=pt;AltLabelQV=pv;"
+ "PulseMergeQV=pg;PulseCall=pc;PrePulseFrames=pd;PulseCallWidth=px;"
+ "BINDINGKIT=100372700;SEQUENCINGKIT=100356200;BASECALLERVERSION=0.1;"
+ "FRAMERATEHZ=100.000000\tPU:ArminsFakeMovie\t"
+ "PM:SEQUEL\n"
+ "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n"
+ "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n"
+ "@PG\tID:baz2bam-0.15.0\tPN:baz2bam\tVN:0.15.0\n"
+ "@PG\tID:bazFormat-0.3.0\tPN:bazFormat\tVN:0.3.0\n"
+ "@PG\tID:bazwriter-0.15.0\tPN:bazwriter\tVN:0.15.0\n"
+ "@CO\tcomment1\n"
+ "@CO\tcomment2\n"
+ };
+
+ { // operator+
+
+ const BamHeader header1(hdrText1);
+ const BamHeader header2(hdrText2);
+ const BamHeader merged = header1 + header2;
+ EXPECT_EQ(mergedText, merged.ToSam());
+
+ // also make sure inputs not changed
+ EXPECT_EQ(hdrText1, header1.ToSam());
+ EXPECT_EQ(hdrText2, header2.ToSam());
+ }
+
+ { // operator+=
+
+ BamHeader header1(hdrText1);
+ header1 += BamHeader(hdrText2);
+ EXPECT_EQ(mergedText, header1.ToSam());
+ }
+}
+
+TEST(BamHeaderTest, MergeHandlesDuplicateReadGroups)
+{
+ const string hdrText = {
+ "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
+ "@RG\tID:a955def6\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+ "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.3.0.0.140018;FRAMERATEHZ=75.000000\t"
+ "PU:m140918_150013_42139_c100697631700000001823144703261565_s1_p0\tPM:SEQUEL\n"
+ "@PG\tID:bam2bam-0.20.0\tPN:bam2bam\tVN:0.20.0\n"
+ "@PG\tID:bax2bam-0.0.2\tPN:bax2bam\tVN:0.0.2\n"
+ };
+
+ // duplicate @RG:IDs handled ok (i.e. not duplicated in output)
+ const BamHeader header1(hdrText);
+ const BamHeader header2(hdrText);
+ const BamHeader merged = header1 + header2;
+ EXPECT_EQ(hdrText, merged.ToSam());
+}
+
+TEST(BamHeaderTest, MergeCompatibilityOk)
+{
+ { // different @HD:VN - this IS allowed (as of SAT-465, pbbam v0.7.2)
+ const string hdrText1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" };
+ const string hdrText2 = { "@HD\tVN:1.0\tSO:unknown\tpb:3.0.1\n" };
+ const BamHeader header1(hdrText1);
+ const BamHeader header2(hdrText2);
+ EXPECT_NO_THROW(header1 + header2);
+ }
+
+ { // different @HD:SO
+ const string hdrText1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" };
+ const string hdrText2 = { "@HD\tVN:1.1\tSO:coordinate\tpb:3.0.1\n" };
+ const BamHeader header1(hdrText1);
+ const BamHeader header2(hdrText2);
+ EXPECT_THROW(header1 + header2, std::runtime_error);
+ }
+
+ { // different @HD:pb - this IS allowed (as of SAT-529, pbbam 0.7.4)
+ const string hdrText1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n" };
+ const string hdrText2 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.3\n" };
+ const BamHeader header1(hdrText1);
+ const BamHeader header2(hdrText2);
+ EXPECT_NO_THROW(header1 + header2);
+ }
+
+ { // @SQ list clash
+ const string hdrText1 = {
+ "@HD\tVN:1.1\tSO:coordinate\tpb:3.0.1\n"
+ "@SQ\tSN:foo\tLN:42\n"
+ "@SQ\tSN:bar\tLN:24\n"
+ };
+ const string hdrText2 = {
+ "@HD\tVN:1.1\tSO:coordinate\tpb:3.0.1\n"
+ "@SQ\tSN:foo\tLN:42\n"
+ "@SQ\tSN:baz\tLN:99\n"
+ };
+ const BamHeader header1(hdrText1);
+ const BamHeader header2(hdrText2);
+ EXPECT_THROW(header1 + header2, std::runtime_error);
+ }
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/BamTagCodec.h>
+#include <array>
+#include <initializer_list>
+#include <string>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace tests {
+
+static
+BamRecordImpl CreateBamImpl(void)
+{
+ TagCollection tags;
+ tags["HX"] = string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = vector<uint8_t>({34, 5, 125});
+ tags["XY"] = static_cast<int32_t>(-42);
+
+ BamRecordImpl bam;
+ bam.Bin(42);
+ bam.Flag(42);
+ bam.InsertSize(42);
+ bam.MapQuality(42);
+ bam.MatePosition(42);
+ bam.MateReferenceId(42);
+ bam.Position(42);
+ bam.ReferenceId(42);
+ bam.Tags(tags);
+ return bam;
+}
+
+static inline
+BamRecord CreateBam(void)
+{ return BamRecord{ CreateBamImpl() }; }
+
+static
+void CheckRawData(const BamRecordImpl& bam)
+{
+ // ensure raw data (lengths at least) matches API-facing data
+ const uint32_t expectedNameLength = bam.Name().size() + 1;
+ const uint32_t expectedNumCigarOps = bam.CigarData().size();
+ const int32_t expectedSeqLength = bam.Sequence().length();
+ const size_t expectedTagsLength = BamTagCodec::Encode(bam.Tags()).size();
+
+ // Name CIGAR Sequence Quals Tags
+ // l_qname + (n_cigar * 4) + (l_qseq+1)/2 + l_qseq + << TAGS >>
+ const int expectedTotalDataLength = expectedNameLength +
+ (expectedNumCigarOps * 4) +
+ (expectedSeqLength+1)/2 +
+ expectedSeqLength +
+ expectedTagsLength;
+ EXPECT_TRUE((bool)bam.d_);
+ EXPECT_EQ(expectedNameLength, bam.d_->core.l_qname);
+ EXPECT_EQ(expectedNumCigarOps, bam.d_->core.n_cigar);
+ EXPECT_EQ(expectedSeqLength, bam.d_->core.l_qseq);
+ EXPECT_EQ(expectedTotalDataLength, bam.d_->l_data);
+}
+
+static inline
+void CheckRawData(const BamRecord& bam)
+{ CheckRawData(bam.impl_); }
+
+static
+BamRecordImpl MakeCigaredImpl(const string& seq,
+ const string& cigar,
+ const Strand strand)
+{
+ BamRecordImpl impl;
+ impl.SetMapped(true).ReferenceId(0).Position(0).MapQuality(0);
+ impl.CigarData(Cigar::FromStdString(cigar));
+ impl.MateReferenceId(-1).MatePosition(-1).InsertSize(0);
+ impl.SetSequenceAndQualities(seq, string(seq.size(), '*'));
+ impl.SetReverseStrand(strand == Strand::REVERSE);
+ return impl;
+}
+
+static inline
+BamRecord MakeCigaredRecord(const string& seq,
+ const string& cigar,
+ const Strand strand)
+{ return BamRecord{ MakeCigaredImpl(seq, cigar, strand) }; }
+
+static
+BamRecord MakeCigaredBaseRecord(const string& bases,
+ const string& cigar,
+ const Strand strand)
+{
+ TagCollection tags;
+ tags["dt"] = bases;
+ tags["st"] = bases;
+
+ const string seq = string(bases.size(), 'N');
+ BamRecordImpl impl = MakeCigaredImpl(seq, cigar, strand);
+ impl.Tags(tags);
+ return BamRecord(std::move(impl));
+}
+
+static
+BamRecord MakeCigaredFrameRecord(const vector<uint16_t>& frames,
+ const string& cigar,
+ const Strand strand)
+{
+ TagCollection tags;
+ tags["ip"] = frames;
+ tags["pw"] = frames;
+
+ const string seq = string(frames.size(), 'N');
+ BamRecordImpl impl = MakeCigaredImpl(seq, cigar, strand);
+ impl.Tags(tags);
+ return BamRecord(std::move(impl));
+}
+
+static
+BamRecord MakeCigaredQualRecord(const string& quals,
+ const string& cigar,
+ const Strand strand)
+{
+ TagCollection tags;
+ tags["dq"] = quals;
+ tags["iq"] = quals;
+ tags["mq"] = quals;
+ tags["sq"] = quals;
+
+ const string seq = string(quals.size(), 'N');
+ BamRecordImpl impl = MakeCigaredImpl(seq, cigar, strand);
+ impl.Tags(tags);
+ return BamRecord(std::move(impl));
+}
+
+static
+BamRecord MakeCigaredPulseBaseRecord(const string& seqBases,
+ const string& pulseCalls,
+ const string& pulseBases,
+ const string& cigar,
+ const Strand strand)
+{
+ TagCollection tags;
+ tags["pc"] = pulseCalls; // PulseCall
+ tags["pt"] = pulseBases; // AltLabelTag
+
+ BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
+ impl.Tags(tags);
+ return BamRecord(std::move(impl));
+}
+
+static
+BamRecord MakeCigaredPulseQualRecord(const string& seqBases,
+ const string& pulseCalls,
+ const string& pulseQuals,
+ const string& cigar,
+ const Strand strand)
+{
+ TagCollection tags;
+ tags["pc"] = pulseCalls;
+ tags["pv"] = pulseQuals; // AltLabelQV
+ tags["pq"] = pulseQuals; // LabelQV
+ tags["pg"] = pulseQuals; // PulseMergeQV
+
+ BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
+ impl.Tags(tags);
+ return BamRecord(std::move(impl));
+}
+
+static
+BamRecord MakeCigaredPulseFrameRecord(const string& seqBases,
+ const string& pulseCalls,
+ const vector<uint16_t>& pulseFrames,
+ const string& cigar,
+ const Strand strand)
+{
+ TagCollection tags;
+ tags["pc"] = pulseCalls;
+ tags["pd"] = pulseFrames; // PrePulseFrames
+ tags["px"] = pulseFrames; // PulseCallWidth
+
+ BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
+ impl.Tags(tags);
+ return BamRecord(std::move(impl));
+}
+
+static
+BamRecord MakeCigaredPulseUIntRecord(const string& seqBases,
+ const string& pulseCalls,
+ const vector<uint32_t>& pulseUInts,
+ const string& cigar,
+ const Strand strand)
+{
+ TagCollection tags;
+ tags["pc"] = pulseCalls;
+ tags["sf"] = pulseUInts; // StartFrame
+
+ BamRecordImpl impl = MakeCigaredImpl(seqBases, cigar, strand);
+ impl.Tags(tags);
+ return BamRecord(std::move(impl));
+}
+
+// ----------------------------------------------------------
+// helper structs and methods for checking combinations of:
+// aligned strand, orientation requested, alignment, clipping
+// ----------------------------------------------------------
+
+// generic result holder for various requested states
+template<typename T>
+struct ExpectedResult
+{
+public:
+ ExpectedResult(std::initializer_list<T> init)
+ : d_(init)
+ {
+ assert(12 == init.size());
+ }
+
+ T ForwardGenomic(void) const { return d_.at(0); }
+ T ForwardNative(void) const { return d_.at(1); }
+ T ForwardGenomicAligned(void) const { return d_.at(2); }
+ T ForwardNativeAligned(void) const { return d_.at(3); }
+ T ForwardGenomicAlignedClipped(void) const { return d_.at(4); }
+ T ForwardNativeAlignedClipped(void) const { return d_.at(5); }
+ T ReverseGenomic(void) const { return d_.at(6); }
+ T ReverseNative(void) const { return d_.at(7); }
+ T ReverseGenomicAligned(void) const { return d_.at(8); }
+ T ReverseNativeAligned(void) const { return d_.at(9); }
+ T ReverseGenomicAlignedClipped(void) const { return d_.at(10); }
+ T ReverseNativeAlignedClipped(void) const { return d_.at(11); }
+
+private:
+ vector<T> d_;
+};
+
+// generic data type checker on the various requested states
+template<typename DataType, typename MakeRecordType, typename FetchDataType>
+void CheckAlignAndClip(const string& cigar,
+ const DataType& input,
+ const tests::ExpectedResult<DataType>& e,
+ const MakeRecordType& makeRecord,
+ const FetchDataType& fetchData)
+{
+ { // map to forward strand
+ const BamRecord b = makeRecord(input, cigar, Strand::FORWARD);
+ EXPECT_EQ(e.ForwardGenomic(), fetchData(b, Orientation::GENOMIC, false, false));
+ EXPECT_EQ(e.ForwardNative(), fetchData(b, Orientation::NATIVE, false, false));
+ EXPECT_EQ(e.ForwardGenomicAligned(), fetchData(b, Orientation::GENOMIC, true, false));
+ EXPECT_EQ(e.ForwardNativeAligned(), fetchData(b, Orientation::NATIVE, true, false));
+ EXPECT_EQ(e.ForwardGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true, true));
+ EXPECT_EQ(e.ForwardNativeAlignedClipped(), fetchData(b, Orientation::NATIVE, true, true));
+ }
+ { // map to reverse strand
+ const BamRecord b = makeRecord(input, cigar, Strand::REVERSE);
+ EXPECT_EQ(e.ReverseGenomic(), fetchData(b, Orientation::GENOMIC, false, false));
+ EXPECT_EQ(e.ReverseNative(), fetchData(b, Orientation::NATIVE, false, false));
+ EXPECT_EQ(e.ReverseGenomicAligned(), fetchData(b, Orientation::GENOMIC, true, false));
+ EXPECT_EQ(e.ReverseNativeAligned(), fetchData(b, Orientation::NATIVE, true, false));
+ EXPECT_EQ(e.ReverseGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true, true));
+ EXPECT_EQ(e.ReverseNativeAlignedClipped(), fetchData(b, Orientation::NATIVE, true, true));
+ }
+}
+
+template<typename DataType, typename MakeRecordType, typename FetchDataType>
+void CheckPulseDataAlignAndClip(const string& cigar,
+ const string& seqBases,
+ const string& pulseCalls,
+ const DataType& input,
+ const tests::ExpectedResult<DataType>& allPulses,
+ const tests::ExpectedResult<DataType>& basecallsOnly,
+ const MakeRecordType& makeRecord,
+ const FetchDataType& fetchData)
+{
+ { // map to forward strand
+ const BamRecord b = makeRecord(seqBases, pulseCalls, input, cigar, Strand::FORWARD);
+
+ EXPECT_EQ(allPulses.ForwardGenomic(), fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::ALL));
+ EXPECT_EQ(allPulses.ForwardNative(), fetchData(b, Orientation::NATIVE, false, false, PulseBehavior::ALL));
+ // no align/clipping operations available on ALL pulses
+
+ EXPECT_EQ(basecallsOnly.ForwardGenomic(), fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ForwardNative(), fetchData(b, Orientation::NATIVE, false, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ForwardGenomicAligned(), fetchData(b, Orientation::GENOMIC, true, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ForwardNativeAligned(), fetchData(b, Orientation::NATIVE, true, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ForwardGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true, true, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ForwardNativeAlignedClipped(), fetchData(b, Orientation::NATIVE, true, true, PulseBehavior::BASECALLS_ONLY));
+ }
+ { // map to reverse strand
+ const BamRecord b = makeRecord(seqBases, pulseCalls, input, cigar, Strand::REVERSE);
+
+ EXPECT_EQ(allPulses.ReverseGenomic(), fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::ALL));
+ EXPECT_EQ(allPulses.ReverseNative(), fetchData(b, Orientation::NATIVE, false, false, PulseBehavior::ALL));
+ // no align/clipping operations available on ALL pulses
+
+ EXPECT_EQ(basecallsOnly.ReverseGenomic(), fetchData(b, Orientation::GENOMIC, false, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ReverseNative(), fetchData(b, Orientation::NATIVE, false, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ReverseGenomicAligned(), fetchData(b, Orientation::GENOMIC, true, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ReverseNativeAligned(), fetchData(b, Orientation::NATIVE, true, false, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ReverseGenomicAlignedClipped(), fetchData(b, Orientation::GENOMIC, true, true, PulseBehavior::BASECALLS_ONLY));
+ EXPECT_EQ(basecallsOnly.ReverseNativeAlignedClipped(), fetchData(b, Orientation::NATIVE, true, true, PulseBehavior::BASECALLS_ONLY));
+ }
+}
+
+static
+void CheckBaseTagsClippedAndAligned(const string& cigar,
+ const string& input,
+ const ExpectedResult<string>& e)
+{
+ // aligned record + DeletionTag, SubstitutionTag
+ auto makeRecord = [](const string& bases,
+ const string& cigar,
+ const Strand strand)
+ { return MakeCigaredBaseRecord(bases, cigar, strand); };
+
+ // DeletionTag
+ CheckAlignAndClip(cigar, input, e, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips)
+ { return b.DeletionTag(orientation, aligned, exciseSoftClips); }
+ );
+
+ // SubstitutionTag
+ CheckAlignAndClip(cigar, input, e, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips)
+ { return b.SubstitutionTag(orientation, aligned, exciseSoftClips); }
+ );
+}
+
+static
+void CheckFrameTagsClippedAndAligned(const string& cigar,
+ const vector<uint16_t>& input,
+ const ExpectedResult<vector<uint16_t> >& e)
+{
+
+ // aligned record + IPD, PulseWidth
+ auto makeRecord = [](const vector<uint16_t>& frames,
+ const string& cigar,
+ const Strand strand)
+ { return tests::MakeCigaredFrameRecord(frames, cigar, strand); };
+
+ // IPD
+ CheckAlignAndClip(cigar, input, e, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips)
+ { return b.IPD(orientation, aligned, exciseSoftClips).Data(); }
+ );
+
+ // PulseWidth
+ CheckAlignAndClip(cigar, input, e, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips)
+ { return b.PulseWidth(orientation, aligned, exciseSoftClips).Data(); }
+ );
+}
+
+static
+void CheckQualityTagsClippedAndAligned(const string& cigar,
+ const string& input,
+ const ExpectedResult<string>& e)
+{
+ // aligned record + DeletionQV, InsertionQV, MergeQV, SubstitutionQV
+ auto makeRecord = [](const string& quals,
+ const string& cigar,
+ const Strand strand)
+ { return tests::MakeCigaredQualRecord(quals, cigar, strand); };
+
+ // DeletionQV
+ CheckAlignAndClip(cigar, input, e, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips)
+ { return b.DeletionQV(orientation, aligned, exciseSoftClips).Fastq(); }
+ );
+
+ // InsertionQV
+ CheckAlignAndClip(cigar, input, e, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips)
+ { return b.InsertionQV(orientation, aligned, exciseSoftClips).Fastq(); }
+ );
+
+ // MergeQV
+ CheckAlignAndClip(cigar, input, e, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips)
+ { return b.MergeQV(orientation, aligned, exciseSoftClips).Fastq(); }
+ );
+
+ // SubstitutionQV
+ CheckAlignAndClip(cigar, input, e, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips)
+ { return b.SubstitutionQV(orientation, aligned, exciseSoftClips).Fastq(); }
+ );
+}
+
+static
+void CheckQualitiesClippedAndAligned(const string& cigar,
+ const string& input,
+ const ExpectedResult<string>& e)
+{
+ // aligned record w/ dummy SEQ & QUALs under test
+ auto makeRecord = [](const string& quals,
+ const string& cigar,
+ const Strand strand)
+ {
+ const string seq = string(quals.size(), 'N');
+ auto record = tests::MakeCigaredRecord(seq, cigar, strand);
+ record.Impl().SetSequenceAndQualities(seq, quals);
+ return record;
+ };
+
+ // QUAL
+ CheckAlignAndClip(cigar, input, e, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips)
+ { return b.Qualities(orientation, aligned, exciseSoftClips).Fastq(); }
+ );
+}
+
+static
+void CheckSequenceClippedAndAligned(const string& cigar,
+ const string& input,
+ const ExpectedResult<string>& e)
+{
+ // aligned record w/ SEQ
+ auto makeRecord = [](const string& seq,
+ const string& cigar,
+ const Strand strand)
+ { return tests::MakeCigaredRecord(seq, cigar, strand); };
+
+ // SEQ
+ CheckAlignAndClip(cigar, input, e, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips)
+ { return b.Sequence(orientation, aligned, exciseSoftClips); }
+ );
+}
+
+static
+void CheckPulseBaseTags(const string& cigar,
+ const string& seqBases,
+ const string& pulseCalls,
+ const string& pulseBases,
+ const ExpectedResult<string>& allPulses,
+ const ExpectedResult<string>& basecallsOnly)
+{
+ // aligned record + AltLabelTag
+ auto makeRecord = [](const string& seqBases,
+ const string& pulseCalls,
+ const string& pulseBases,
+ const string& cigar,
+ const Strand strand)
+ { return MakeCigaredPulseBaseRecord(seqBases, pulseCalls, pulseBases, cigar, strand); };
+
+ // AltLabelTag
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseBases, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.AltLabelTag(orientation, aligned, exciseSoftClips, pulseBehavior); }
+ );
+ // PulseCall
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseBases, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.PulseCall(orientation, aligned, exciseSoftClips, pulseBehavior); }
+ );
+}
+
+static
+void CheckPulseFrameTags(const string& cigar,
+ const string& seqBases,
+ const string& pulseCalls,
+ const vector<uint16_t>& pulseFrames,
+ const ExpectedResult<vector<uint16_t>>& allPulses,
+ const ExpectedResult<vector<uint16_t>>& basecallsOnly)
+{
+ // aligned record + PrePulseFrames
+ auto makeRecord = [](const string& seqBases,
+ const string& pulseCalls,
+ const vector<uint16_t>& pulseFrames,
+ const string& cigar,
+ const Strand strand)
+ { return MakeCigaredPulseFrameRecord(seqBases, pulseCalls, pulseFrames, cigar, strand); };
+
+ // PrePulseFrame
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseFrames, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.PrePulseFrames(orientation, aligned, exciseSoftClips, pulseBehavior).Data(); }
+ );
+ // PulseCallWidth
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseFrames, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.PulseCallWidth(orientation, aligned, exciseSoftClips, pulseBehavior).Data(); }
+ );
+}
+
+/*
+
+ { BamRecordTag::PKMEAN, {"pa", true} }, photons (vector<float>
+ { BamRecordTag::PKMEAN_2, {"ps", true} }, photons
+ { BamRecordTag::PKMID, {"pm", true} }, photons
+ { BamRecordTag::PKMID_2, {"pi", true} }, photons
+*/
+
+static
+void CheckPulseQualityTags(const string& cigar,
+ const string& seqBases,
+ const string& pulseCalls,
+ const string& pulseQuals,
+ const ExpectedResult<string>& allPulses,
+ const ExpectedResult<string>& basecallsOnly)
+{
+ // aligned record + AltLabelQV
+ auto makeRecord = [](const string& seqBases,
+ const string& pulseCalls,
+ const string& pulseQuals,
+ const string& cigar,
+ const Strand strand)
+ { return MakeCigaredPulseQualRecord(seqBases, pulseCalls, pulseQuals, cigar, strand); };
+
+ // AltLabelQV
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseQuals, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.AltLabelQV(orientation, aligned, exciseSoftClips, pulseBehavior).Fastq(); }
+ );
+ // LabelQV
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseQuals, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.LabelQV(orientation, aligned, exciseSoftClips, pulseBehavior).Fastq(); }
+ );
+ // PulseMergeQV
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, pulseQuals, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.PulseMergeQV(orientation, aligned, exciseSoftClips, pulseBehavior).Fastq(); }
+ );
+}
+
+static
+void CheckPulseUIntTags(const string& cigar,
+ const string& seqBases,
+ const string& pulseCalls,
+ const vector<uint32_t>& startFrames,
+ const ExpectedResult<vector<uint32_t>>& allPulses,
+ const ExpectedResult<vector<uint32_t>>& basecallsOnly)
+{
+ // aligned record + StartFrame
+ auto makeRecord = [](const string& seqBases,
+ const string& pulseCalls,
+ const vector<uint32_t>& startFrames,
+ const string& cigar,
+ const Strand strand)
+ { return MakeCigaredPulseUIntRecord(seqBases, pulseCalls, startFrames, cigar, strand); };
+
+ // StartFrame
+ CheckPulseDataAlignAndClip(cigar, seqBases, pulseCalls, startFrames, allPulses, basecallsOnly, makeRecord,
+ [](const BamRecord& b,
+ Orientation orientation,
+ bool aligned,
+ bool exciseSoftClips,
+ PulseBehavior pulseBehavior)
+ { return b.StartFrame(orientation, aligned, exciseSoftClips, pulseBehavior); }
+ );
+}
+
+
+
+} // namespace tests
+
+TEST(BamRecordTest, DefaultValues)
+{
+ BamRecord bam;
+ const string emptyString;
+
+ // BamRecordImpl data
+ EXPECT_EQ(0, bam.impl_.Bin());
+ EXPECT_EQ(BamRecordImpl::UNMAPPED, bam.impl_.Flag()); // forced init unmapped
+ EXPECT_EQ(0, bam.impl_.InsertSize());
+ EXPECT_EQ(255, bam.impl_.MapQuality());
+ EXPECT_EQ(-1, bam.impl_.MateReferenceId());
+ EXPECT_EQ(-1, bam.impl_.MatePosition());
+ EXPECT_EQ(-1, bam.impl_.Position());
+ EXPECT_EQ(-1, bam.impl_.ReferenceId());
+ EXPECT_EQ(0, bam.impl_.Tags().size());
+
+ EXPECT_FALSE(bam.impl_.IsDuplicate());
+ EXPECT_FALSE(bam.impl_.IsFailedQC());
+ EXPECT_FALSE(bam.impl_.IsFirstMate());
+ EXPECT_FALSE(bam.impl_.IsMapped()); // forced init unmapped
+ EXPECT_TRUE(bam.impl_.IsMateMapped());
+ EXPECT_FALSE(bam.impl_.IsMateReverseStrand());
+ EXPECT_FALSE(bam.impl_.IsPaired());
+ EXPECT_TRUE(bam.impl_.IsPrimaryAlignment());
+ EXPECT_FALSE(bam.impl_.IsProperPair());
+ EXPECT_FALSE(bam.impl_.IsReverseStrand());
+ EXPECT_FALSE(bam.impl_.IsSecondMate());
+ EXPECT_FALSE(bam.impl_.IsSupplementaryAlignment());
+
+ EXPECT_EQ(emptyString, bam.impl_.Name());
+ EXPECT_EQ(emptyString, bam.impl_.CigarData().ToStdString());
+ EXPECT_EQ(emptyString, bam.impl_.Sequence());
+ EXPECT_EQ(emptyString, bam.impl_.Qualities().Fastq());
+
+ // PacBio data
+ EXPECT_EQ(-1, bam.AlignedStart());
+ EXPECT_EQ(-1, bam.AlignedEnd());
+
+ EXPECT_FALSE(bam.HasHoleNumber());
+ EXPECT_FALSE(bam.HasNumPasses());
+ EXPECT_FALSE(bam.HasQueryEnd());
+ EXPECT_FALSE(bam.HasQueryStart());
+ EXPECT_FALSE(bam.HasReadAccuracy());
+
+ EXPECT_THROW(bam.HoleNumber(), std::exception);
+ EXPECT_THROW(bam.NumPasses(), std::exception);
+ EXPECT_EQ(Position(0), bam.QueryEnd());
+ EXPECT_EQ(Position(0), bam.QueryStart());
+ EXPECT_THROW(bam.ReadAccuracy(), std::exception);
+
+ EXPECT_FALSE(bam.HasDeletionQV());
+ EXPECT_FALSE(bam.HasDeletionTag());
+ EXPECT_FALSE(bam.HasInsertionQV());
+ EXPECT_FALSE(bam.HasMergeQV());
+ EXPECT_FALSE(bam.HasSubstitutionQV());
+ EXPECT_FALSE(bam.HasSubstitutionTag());
+
+ EXPECT_THROW(bam.DeletionQV(), std::exception);
+ EXPECT_THROW(bam.DeletionTag(), std::exception);
+ EXPECT_THROW(bam.InsertionQV(), std::exception);
+ EXPECT_THROW(bam.MergeQV(), std::exception);
+ EXPECT_THROW(bam.SubstitutionQV(), std::exception);
+ EXPECT_THROW(bam.SubstitutionTag(), std::exception);
+
+ // raw data
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordTest, FromBamRecordImpl)
+{
+ // check generic data
+ BamRecordImpl genericBam = tests::CreateBamImpl();
+
+ EXPECT_EQ(42, genericBam.Bin());
+ EXPECT_EQ(42, genericBam.Flag());
+ EXPECT_EQ(42, genericBam.InsertSize());
+ EXPECT_EQ(42, genericBam.MapQuality());
+ EXPECT_EQ(42, genericBam.MateReferenceId());
+ EXPECT_EQ(42, genericBam.MatePosition());
+ EXPECT_EQ(42, genericBam.Position());
+ EXPECT_EQ(42, genericBam.ReferenceId());
+
+ const TagCollection& genericTags = genericBam.Tags();
+ EXPECT_TRUE(genericTags.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(string("1abc75"), genericTags.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), genericTags.at("XY").ToInt32());
+ EXPECT_EQ(vector<uint8_t>({34, 5, 125}), genericTags.at("CA").ToUInt8Array());
+
+ // copy ctor
+ BamRecord bam1(genericBam);
+
+ EXPECT_EQ(42, bam1.impl_.Bin());
+ EXPECT_EQ(42, bam1.impl_.Flag());
+ EXPECT_EQ(42, bam1.impl_.InsertSize());
+ EXPECT_EQ(42, bam1.impl_.MapQuality());
+ EXPECT_EQ(42, bam1.impl_.MateReferenceId());
+ EXPECT_EQ(42, bam1.impl_.MatePosition());
+ EXPECT_EQ(42, bam1.impl_.Position());
+ EXPECT_EQ(42, bam1.impl_.ReferenceId());
+
+ const TagCollection& bam1Tags = bam1.impl_.Tags();
+ EXPECT_TRUE(bam1Tags.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(string("1abc75"), bam1Tags.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), bam1Tags.at("XY").ToInt32());
+ EXPECT_EQ(vector<uint8_t>({34, 5, 125}), bam1Tags.at("CA").ToUInt8Array());
+
+ // copy assignment
+ BamRecord bam2;
+ bam2 = genericBam;
+
+ EXPECT_EQ(42, bam2.impl_.Bin());
+ EXPECT_EQ(42, bam2.impl_.Flag());
+ EXPECT_EQ(42, bam2.impl_.InsertSize());
+ EXPECT_EQ(42, bam2.impl_.MapQuality());
+ EXPECT_EQ(42, bam2.impl_.MateReferenceId());
+ EXPECT_EQ(42, bam2.impl_.MatePosition());
+ EXPECT_EQ(42, bam2.impl_.Position());
+ EXPECT_EQ(42, bam2.impl_.ReferenceId());
+
+ const TagCollection& bam2Tags = bam2.impl_.Tags();
+ EXPECT_TRUE(bam2Tags.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(string("1abc75"), bam2Tags.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), bam2Tags.at("XY").ToInt32());
+ EXPECT_EQ(vector<uint8_t>({34, 5, 125}), bam2Tags.at("CA").ToUInt8Array());
+
+ // change genericBam, make sure we deep copied bam1 & bam2
+ genericBam.Position(2000);
+
+ EXPECT_EQ(2000, genericBam.Position());
+ EXPECT_EQ(42, bam1.impl_.Position());
+ EXPECT_EQ(42, bam2.impl_.Position());
+
+ // move ctor
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpessimizing-move"
+#endif
+ BamRecord bam3(move(tests::CreateBamImpl()));
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+ EXPECT_EQ(42, bam3.impl_.Bin());
+ EXPECT_EQ(42, bam3.impl_.Flag());
+ EXPECT_EQ(42, bam3.impl_.InsertSize());
+ EXPECT_EQ(42, bam3.impl_.MapQuality());
+ EXPECT_EQ(42, bam3.impl_.MateReferenceId());
+ EXPECT_EQ(42, bam3.impl_.MatePosition());
+ EXPECT_EQ(42, bam3.impl_.Position());
+ EXPECT_EQ(42, bam3.impl_.ReferenceId());
+
+ const TagCollection& bam3Tags = bam3.impl_.Tags();
+ EXPECT_TRUE(bam3Tags.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(string("1abc75"), bam3Tags.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), bam3Tags.at("XY").ToInt32());
+ EXPECT_EQ(vector<uint8_t>({34, 5, 125}), bam3Tags.at("CA").ToUInt8Array());
+
+ // move assignment
+ BamRecord bam4;
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpessimizing-move"
+#endif
+ bam4 = move(tests::CreateBamImpl());
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+ EXPECT_EQ(42, bam4.impl_.Bin());
+ EXPECT_EQ(42, bam4.impl_.Flag());
+ EXPECT_EQ(42, bam4.impl_.InsertSize());
+ EXPECT_EQ(42, bam4.impl_.MapQuality());
+ EXPECT_EQ(42, bam4.impl_.MateReferenceId());
+ EXPECT_EQ(42, bam4.impl_.MatePosition());
+ EXPECT_EQ(42, bam4.impl_.Position());
+ EXPECT_EQ(42, bam4.impl_.ReferenceId());
+
+ const TagCollection& bam4Tags = bam4.impl_.Tags();
+ EXPECT_TRUE(bam4Tags.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(string("1abc75"), bam4Tags.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), bam4Tags.at("XY").ToInt32());
+ EXPECT_EQ(vector<uint8_t>({34, 5, 125}), bam4Tags.at("CA").ToUInt8Array());
+}
+
+TEST(BamRecordTest, SelfAssignmentTolerated)
+{
+ BamRecord bam1;
+ bam1.impl_.Bin(42);
+ bam1.impl_.Flag(42);
+ bam1.impl_.InsertSize(42);
+ bam1.impl_.MapQuality(42);
+ bam1.impl_.MatePosition(42);
+ bam1.impl_.MateReferenceId(42);
+ bam1.impl_.Position(42);
+ bam1.impl_.ReferenceId(42);
+
+ TagCollection tags;
+ tags["HX"] = string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = vector<uint8_t>({34, 5, 125});
+ tags["XY"] = static_cast<int32_t>(-42);
+ bam1.impl_.Tags(tags);
+
+ bam1 = bam1;
+
+ EXPECT_EQ(42, bam1.impl_.Bin());
+ EXPECT_EQ(42, bam1.impl_.Flag());
+ EXPECT_EQ(42, bam1.impl_.InsertSize());
+ EXPECT_EQ(42, bam1.impl_.MapQuality());
+ EXPECT_EQ(42, bam1.impl_.MateReferenceId());
+ EXPECT_EQ(42, bam1.impl_.MatePosition());
+ EXPECT_EQ(42, bam1.impl_.Position());
+ EXPECT_EQ(42, bam1.impl_.ReferenceId());
+
+ const TagCollection& fetchedTags1 = bam1.impl_.Tags();
+ EXPECT_TRUE(fetchedTags1.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(string("1abc75"), fetchedTags1.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), fetchedTags1.at("XY").ToInt32());
+ EXPECT_EQ(vector<uint8_t>({34, 5, 125}), fetchedTags1.at("CA").ToUInt8Array());
+
+ tests::CheckRawData(bam1);
+}
+
+TEST(BamRecordTest, CoreSetters)
+{
+ // create basic BAM with (generic) data
+ BamRecord bam = tests::CreateBam();
+
+ QualityValues testQVs;
+ testQVs.push_back(0);
+ testQVs.push_back(1);
+
+ const string testTags = "GATTACA";
+
+ // now set PacBio data
+// bam.AlignedStart(42);
+// bam.AlignedEnd(42);
+// bam.DeletionQVs(testQVs);
+// bam.DeletionTags(testTags);
+// bam.HoleNumber(42);
+// bam.InsertionQVs(testQVs);
+// bam.MergeQVs(testQVs);
+// bam.NumPasses(42);
+// bam.QueryEnd(42);
+// bam.QueryStart(42);
+// bam.ReadAccuracy(42);
+// bam.ReferenceEnd(42);
+// bam.ReferenceStart(42);
+// bam.SubstitutionQVs(testQVs);
+// bam.SubstitutionTags(testTags);
+
+ // check generic data
+ EXPECT_EQ(42, bam.impl_.Bin());
+ EXPECT_EQ(42, bam.impl_.Flag());
+ EXPECT_EQ(42, bam.impl_.InsertSize());
+ EXPECT_EQ(42, bam.impl_.MapQuality());
+ EXPECT_EQ(42, bam.impl_.MateReferenceId());
+ EXPECT_EQ(42, bam.impl_.MatePosition());
+ EXPECT_EQ(42, bam.impl_.Position());
+ EXPECT_EQ(42, bam.impl_.ReferenceId());
+
+ // check PacBio data
+// EXPECT_EQ(42, bam.AlignedStart());
+// EXPECT_EQ(42, bam.AlignedEnd());
+// EXPECT_EQ(testQVs, bam.DeletionQVs());
+// EXPECT_EQ(testTags, bam.DeletionTags());
+// EXPECT_EQ(42, bam.HoleNumber());
+// EXPECT_EQ(testQVs, bam.InsertionQVs());
+// EXPECT_EQ(testQVs, bam.MergeQVs());
+
+// EXPECT_EQ(42, bam.NumPasses());
+// EXPECT_EQ(42, bam.QueryEnd());
+// EXPECT_EQ(42, bam.QueryStart());
+// EXPECT_EQ(42, bam.ReadAccuracy());
+// EXPECT_EQ(42, bam.ReferenceEnd());
+// EXPECT_EQ(42, bam.ReferenceStart());
+// EXPECT_EQ(testQVs, bam.SubstitutionQVs());
+// EXPECT_EQ(testTags, bam.SubstitutionTags());
+
+ // check tags
+ const TagCollection& fetchedTags = bam.impl_.Tags();
+ EXPECT_TRUE(fetchedTags.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(string("1abc75"), fetchedTags.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), fetchedTags.at("XY").ToInt32());
+ EXPECT_EQ(vector<uint8_t>({34, 5, 125}), fetchedTags.at("CA").ToUInt8Array());
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordTest, SequenceOrientation)
+{
+ {
+ SCOPED_TRACE("Simple CIGAR Sequence");
+ tests::CheckSequenceClippedAndAligned(
+ "13=", // CIGAR
+ "ATATATCCCGGCG", // input
+ {
+ "ATATATCCCGGCG", // forward strand, genomic
+ "ATATATCCCGGCG", // forward strand, native
+ "ATATATCCCGGCG", // forward strand, genomic, aligned
+ "ATATATCCCGGCG", // forward strand, native, aligned
+ "ATATATCCCGGCG", // forward strand, genomic, aligned + clipped
+ "ATATATCCCGGCG", // forward strand, native, aligned + clipped
+ "ATATATCCCGGCG", // reverse strand, genomic
+ "CGCCGGGATATAT", // reverse strand, native
+ "ATATATCCCGGCG", // reverse strand, genomic, aligned
+ "CGCCGGGATATAT", // reverse strand, native, aligned
+ "ATATATCCCGGCG", // reverse strand, genomic, aligned + clipped
+ "CGCCGGGATATAT" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, QualitiesOrientation)
+{
+ {
+ SCOPED_TRACE("Simple CIGAR Qualities");
+ tests::CheckQualitiesClippedAndAligned(
+ "13=", // CIGAR
+ "?]?]?]?]?]?]*", // input
+ {
+ "?]?]?]?]?]?]*", // forward strand, genomic
+ "?]?]?]?]?]?]*", // forward strand, native
+ "?]?]?]?]?]?]*", // forward strand, genomic, aligned
+ "?]?]?]?]?]?]*", // forward strand, native, aligned
+ "?]?]?]?]?]?]*", // forward strand, genomic, aligned + clipped
+ "?]?]?]?]?]?]*", // forward strand, native, aligned + clipped
+ "?]?]?]?]?]?]*", // reverse strand, genomic
+ "*]?]?]?]?]?]?", // reverse strand, native
+ "?]?]?]?]?]?]*", // reverse strand, genomic, aligned
+ "*]?]?]?]?]?]?", // reverse strand, native, aligned
+ "?]?]?]?]?]?]*", // reverse strand, genomic, aligned + clipped
+ "*]?]?]?]?]?]?" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, SequenceTagsOrientation)
+{
+ {
+ SCOPED_TRACE("Simple CIGAR Base Tags");
+ tests::CheckBaseTagsClippedAndAligned(
+ "13=", // CIGAR
+ "ATATATCCCGGCG", // input
+ {
+ "ATATATCCCGGCG", // forward strand, genomic
+ "ATATATCCCGGCG", // forward strand, native
+ "ATATATCCCGGCG", // forward strand, genomic, aligned
+ "ATATATCCCGGCG", // forward strand, native, aligned
+ "ATATATCCCGGCG", // forward strand, genomic, aligned, clipped
+ "ATATATCCCGGCG", // forward strand, native, aligned, clipped
+ "CGCCGGGATATAT", // reverse strand, genomic
+ "ATATATCCCGGCG", // reverse strand, native
+ "CGCCGGGATATAT", // reverse strand, genomic, aligned
+ "ATATATCCCGGCG", // reverse strand, native, aligned
+ "CGCCGGGATATAT", // reverse strand, genomic, aligned, clipped
+ "ATATATCCCGGCG" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, FrameTagsOrientation)
+{
+ {
+ SCOPED_TRACE("Simple CIGAR Frames");
+ tests::CheckFrameTagsClippedAndAligned(
+ "5=", // CIGAR
+ { 0, 1, 2, 3, 4 }, // input
+ {
+ { 0, 1, 2, 3, 4 }, // forward strand, genomic
+ { 0, 1, 2, 3, 4 }, // forward strand, native
+ { 0, 1, 2, 3, 4 }, // forward strand, genomic, aligned
+ { 0, 1, 2, 3, 4 }, // forward strand, native, aligned
+ { 0, 1, 2, 3, 4 }, // forward strand, genomic, aligned, clipped
+ { 0, 1, 2, 3, 4 }, // forward strand, native, aligned, clipped
+ { 4, 3, 2, 1, 0 }, // reverse strand, genomic
+ { 0, 1, 2, 3, 4 }, // reverse strand, native
+ { 4, 3, 2, 1, 0 }, // reverse strand, genomic, aligned
+ { 0, 1, 2, 3, 4 }, // reverse strand, native, aligned
+ { 4, 3, 2, 1, 0 }, // reverse strand, genomic, aligned, clipped
+ { 0, 1, 2, 3, 4 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, QualityTagsOrientation)
+{
+ {
+ SCOPED_TRACE("Simple CIGAR Quality Tags");
+ tests::CheckQualityTagsClippedAndAligned(
+ "13=", // CIGAR
+ "?]?]?]?]?]?]*", // input
+ {
+ "?]?]?]?]?]?]*", // forward strand, genomic
+ "?]?]?]?]?]?]*", // forward strand, native
+ "?]?]?]?]?]?]*", // forward strand, genomic, aligned
+ "?]?]?]?]?]?]*", // forward strand, native, aligned
+ "?]?]?]?]?]?]*", // forward strand, genomic, aligned + clipped
+ "?]?]?]?]?]?]*", // forward strand, native, aligned + clipped
+ "*]?]?]?]?]?]?", // reverse strand, genomic
+ "?]?]?]?]?]?]*", // reverse strand, native
+ "*]?]?]?]?]?]?", // reverse strand, genomic, aligned
+ "?]?]?]?]?]?]*", // reverse strand, native, aligned
+ "*]?]?]?]?]?]?", // reverse strand, genomic, aligned + clipped
+ "?]?]?]?]?]?]*" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, SequenceClippedAndAligned)
+{
+ {
+ SCOPED_TRACE("CIGAR: 10=");
+ tests::CheckSequenceClippedAndAligned(
+ "10=", // CIGAR
+ "ATCCGCGGTT", // input
+ {
+ "ATCCGCGGTT", // forward strand, genomic
+ "ATCCGCGGTT", // forward strand, native
+ "ATCCGCGGTT", // forward strand, genomic, aligned
+ "ATCCGCGGTT", // forward strand, native, aligned
+ "ATCCGCGGTT", // forward strand, genomic, aligned + clipped
+ "ATCCGCGGTT", // forward strand, native, aligned + clipped
+ "ATCCGCGGTT", // reverse strand, genomic
+ "AACCGCGGAT", // reverse strand, native
+ "ATCCGCGGTT", // reverse strand, genomic, aligned
+ "AACCGCGGAT", // reverse strand, native, aligned
+ "ATCCGCGGTT", // reverse strand, genomic, aligned + clipped
+ "AACCGCGGAT" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 3=4N3=");
+ tests::CheckSequenceClippedAndAligned(
+ "3=4N3=", // CIGAR
+ "ACGTTT", // input
+ {
+ "ACGTTT", // forward strand, genomic
+ "ACGTTT", // forward strand, native
+ "ACGTTT", // forward strand, genomic, aligned
+ "ACGTTT", // forward strand, native, aligned
+ "ACGTTT", // forward strand, genomic, aligned + clipped
+ "ACGTTT", // forward strand, native, aligned + clipped
+ "ACGTTT", // reverse strand, genomic
+ "AAACGT", // reverse strand, native
+ "ACGTTT", // reverse strand, genomic, aligned
+ "AAACGT", // reverse strand, native, aligned
+ "ACGTTT", // reverse strand, genomic, aligned + clipped
+ "AAACGT" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 1S8=1S");
+ tests::CheckSequenceClippedAndAligned(
+ "1S8=1S", // CIGAR
+ "ACCCGCGGTT", // input
+ {
+ "ACCCGCGGTT", // forward strand, genomic
+ "ACCCGCGGTT", // forward strand, native
+ "ACCCGCGGTT", // forward strand, genomic, aligned
+ "ACCCGCGGTT", // forward strand, native, aligned
+ "CCCGCGGT", // forward strand, genomic, aligned + clipped
+ "CCCGCGGT", // forward strand, native, aligned + clipped
+ "ACCCGCGGTT", // reverse strand, genomic
+ "AACCGCGGGT", // reverse strand, native
+ "ACCCGCGGTT", // reverse strand, genomic, aligned
+ "AACCGCGGGT", // reverse strand, native, aligned
+ "CCCGCGGT", // reverse strand, genomic, aligned + clipped
+ "ACCGCGGG" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 1H8=1H");
+ tests::CheckSequenceClippedAndAligned(
+ "1H8=1H", // CIGAR
+ "ATCGCGGT", // input
+ {
+ "ATCGCGGT", // forward strand, genomic
+ "ATCGCGGT", // forward strand, native
+ "ATCGCGGT", // forward strand, genomic, aligned
+ "ATCGCGGT", // forward strand, native, aligned
+ "ATCGCGGT", // forward strand, genomic, aligned + clipped
+ "ATCGCGGT", // forward strand, native, aligned + clipped
+ "ATCGCGGT", // reverse strand, genomic
+ "ACCGCGAT", // reverse strand, native
+ "ATCGCGGT", // reverse strand, genomic, aligned
+ "ACCGCGAT", // reverse strand, native, aligned
+ "ATCGCGGT", // reverse strand, genomic, aligned + clipped
+ "ACCGCGAT" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2S6=2S");
+ tests::CheckSequenceClippedAndAligned(
+ "2S6=2S", // CIGAR
+ "AGCCGCGGTT", // input
+ {
+ "AGCCGCGGTT", // forward strand, genomic
+ "AGCCGCGGTT", // forward strand, native
+ "AGCCGCGGTT", // forward strand, genomic, aligned
+ "AGCCGCGGTT", // forward strand, native, aligned
+ "CCGCGG", // forward strand, genomic, aligned + clipped
+ "CCGCGG", // forward strand, native, aligned + clipped
+ "AGCCGCGGTT", // reverse strand, genomic
+ "AACCGCGGCT", // reverse strand, native
+ "AGCCGCGGTT", // reverse strand, genomic, aligned
+ "AACCGCGGCT", // reverse strand, native, aligned
+ "CCGCGG", // reverse strand, genomic, aligned + clipped
+ "CCGCGG" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2S3=2I3=2S");
+ tests::CheckSequenceClippedAndAligned(
+ "2S3=2I3=2S", // CIGAR
+ "ATCCGNNCGGTT", // input
+ {
+ "ATCCGNNCGGTT", // forward strand, genomic
+ "ATCCGNNCGGTT", // forward strand, native
+ "ATCCGNNCGGTT", // forward strand, genomic, aligned
+ "ATCCGNNCGGTT", // forward strand, native, aligned
+ "CCGNNCGG", // forward strand, genomic, aligned + clipped
+ "CCGNNCGG", // forward strand, native, aligned + clipped
+ "ATCCGNNCGGTT", // reverse strand, genomic
+ "AACCGNNCGGAT", // reverse strand, native
+ "ATCCGNNCGGTT", // reverse strand, genomic, aligned
+ "AACCGNNCGGAT", // reverse strand, native, aligned
+ "CCGNNCGG", // reverse strand, genomic, aligned + clipped
+ "CCGNNCGG" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H6=2H");
+ tests::CheckSequenceClippedAndAligned(
+ "2H6=2H", // CIGAR
+ "CAGCGG", // input
+ {
+ "CAGCGG", // forward strand, genomic
+ "CAGCGG", // forward strand, native
+ "CAGCGG", // forward strand, genomic, aligned
+ "CAGCGG", // forward strand, native, aligned
+ "CAGCGG", // forward strand, genomic, aligned + clipped
+ "CAGCGG", // forward strand, native, aligned + clipped
+ "CAGCGG", // reverse strand, genomic
+ "CCGCTG", // reverse strand, native
+ "CAGCGG", // reverse strand, genomic, aligned
+ "CCGCTG", // reverse strand, native, aligned
+ "CAGCGG", // reverse strand, genomic, aligned + clipped
+ "CCGCTG" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, ClippingOrientationAndAlignment)
+{
+ {
+ SCOPED_TRACE("CIGAR: 4=3D4=");
+ tests::CheckSequenceClippedAndAligned(
+ "4=3D4=", // CIGAR
+ "AACCGTTA", // input
+ {
+ "AACCGTTA", // forward strand, genomic
+ "AACCGTTA", // forward strand, native
+ "AACC---GTTA", // forward strand, genomic, aligned
+ "AACC---GTTA", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned + clipped
+ "AACC---GTTA", // forward strand, native, aligned + clipped
+ "AACCGTTA", // reverse strand, genomic
+ "TAACGGTT", // reverse strand, native
+ "AACC---GTTA", // reverse strand, genomic, aligned
+ "TAAC---GGTT", // reverse strand, native, aligned
+ "AACC---GTTA", // reverse strand, genomic, aligned + clipped
+ "TAAC---GGTT" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
+ tests::CheckSequenceClippedAndAligned(
+ "4=1D2I2D4=", // CIGAR
+ "ATCCTAGGTT", // input
+ {
+ "ATCCTAGGTT", // forward strand, genomic
+ "ATCCTAGGTT", // forward strand, native
+ "ATCC-TA--GGTT", // forward strand, genomic, aligned
+ "ATCC-TA--GGTT", // forward strand, native, aligned
+ "ATCC-TA--GGTT", // forward strand, genomic, aligned + clipped
+ "ATCC-TA--GGTT", // forward strand, native, aligned + clipped
+ "ATCCTAGGTT", // reverse strand, genomic
+ "AACCTAGGAT", // reverse strand, native
+ "ATCC-TA--GGTT", // reverse strand, genomic, aligned
+ "AACC--TA-GGAT", // reverse strand, native, aligned
+ "ATCC-TA--GGTT", // reverse strand, genomic, aligned + clipped
+ "AACC--TA-GGAT" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
+ tests::CheckSequenceClippedAndAligned(
+ "4=1D2P2I2P2D4=", // CIGAR
+ "ATCCTAGGTT", // input
+ {
+ "ATCCTAGGTT", // forward strand, genomic
+ "ATCCTAGGTT", // forward strand, native
+ "ATCC-**TA**--GGTT", // forward strand, genomic, aligned
+ "ATCC-**TA**--GGTT", // forward strand, native, aligned
+ "ATCC-**TA**--GGTT", // forward strand, genomic, aligned + clipped
+ "ATCC-**TA**--GGTT", // forward strand, native, aligned + clipped
+ "ATCCTAGGTT", // reverse strand, genomic
+ "AACCTAGGAT", // reverse strand, native
+ "ATCC-**TA**--GGTT", // reverse strand, genomic, aligned
+ "AACC--**TA**-GGAT", // reverse strand, native, aligned
+ "ATCC-**TA**--GGTT", // reverse strand, genomic, aligned + clipped
+ "AACC--**TA**-GGAT" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2S4=3D4=3S");
+ tests::CheckSequenceClippedAndAligned(
+ "2S4=3D4=3S", // CIGAR
+ "TTAACCGTTACCG", // input
+ {
+ "TTAACCGTTACCG", // forward strand, genomic
+ "TTAACCGTTACCG", // forward strand, native
+ "TTAACC---GTTACCG", // forward strand, genomic, aligned
+ "TTAACC---GTTACCG", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned + clipped
+ "AACC---GTTA", // forward strand, native, aligned + clipped
+ "TTAACCGTTACCG", // reverse strand, genomic
+ "CGGTAACGGTTAA", // reverse strand, native
+ "TTAACC---GTTACCG", // reverse strand, genomic, aligned
+ "CGGTAAC---GGTTAA", // reverse strand, native, aligned
+ "AACC---GTTA", // reverse strand, genomic, aligned + clipped
+ "TAAC---GGTT" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
+ tests::CheckSequenceClippedAndAligned(
+ "2H4=3D4=3H", // CIGAR
+ "AACCGTTA", // input
+ {
+ "AACCGTTA", // forward strand, genomic
+ "AACCGTTA", // forward strand, native
+ "AACC---GTTA", // forward strand, genomic, aligned
+ "AACC---GTTA", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned + clipped
+ "AACC---GTTA", // forward strand, native, aligned + clipped
+ "AACCGTTA", // reverse strand, genomic
+ "TAACGGTT", // reverse strand, native
+ "AACC---GTTA", // reverse strand, genomic, aligned
+ "TAAC---GGTT", // reverse strand, native, aligned
+ "AACC---GTTA", // reverse strand, genomic, aligned + clipped
+ "TAAC---GGTT" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H2S4=3D4=3S3H");
+ tests::CheckSequenceClippedAndAligned(
+ "2H2S4=3D4=3S3H", // CIGAR
+ "TTAACCGTTACCG", // input
+ {
+ "TTAACCGTTACCG", // forward strand, genomic
+ "TTAACCGTTACCG", // forward strand, native
+ "TTAACC---GTTACCG", // forward strand, genomic, aligned
+ "TTAACC---GTTACCG", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned + clipped
+ "AACC---GTTA", // forward strand, native, aligned + clipped
+ "TTAACCGTTACCG", // reverse strand, genomic
+ "CGGTAACGGTTAA", // reverse strand, native
+ "TTAACC---GTTACCG", // reverse strand, genomic, aligned
+ "CGGTAAC---GGTTAA", // reverse strand, native, aligned
+ "AACC---GTTA", // reverse strand, genomic, aligned + clipped
+ "TAAC---GGTT" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, QualityTagsClippedAndAligned)
+{
+ {
+ SCOPED_TRACE("CIGAR: 4=3D4=");
+ tests::CheckQualityTagsClippedAndAligned(
+ "4=3D4=", // CIGAR
+ "?]?]?]?@", // input
+ {
+ "?]?]?]?@", // forward strand, genomic
+ "?]?]?]?@", // forward strand, native
+ "?]?]!!!?]?@", // forward strand, genomic, aligned
+ "?]?]!!!?]?@", // forward strand, native, aligned
+ "?]?]!!!?]?@", // forward strand, genomic, aligned + clipped
+ "?]?]!!!?]?@", // forward strand, native, aligned + clipped
+ "@?]?]?]?", // reverse strand, genomic
+ "?]?]?]?@", // reverse strand, native
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned
+ "?]?]!!!?]?@", // reverse strand, native, aligned
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned + clipped
+ "?]?]!!!?]?@" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
+ tests::CheckQualityTagsClippedAndAligned(
+ "4=1D2I2D4=", // CIGAR
+ "?]?]87?]?@", // input
+ {
+ "?]?]87?]?@", // forward strand, genomic
+ "?]?]87?]?@", // forward strand, native
+ "?]?]!87!!?]?@", // forward strand, genomic, aligned
+ "?]?]!87!!?]?@", // forward strand, native, aligned
+ "?]?]!87!!?]?@", // forward strand, genomic, aligned + clipped
+ "?]?]!87!!?]?@", // forward strand, native, aligned + clipped
+ "@?]?78]?]?", // reverse strand, genomic
+ "?]?]87?]?@", // reverse strand, native
+ "@?]?!78!!]?]?", // reverse strand, genomic, aligned
+ "?]?]!!87!?]?@", // reverse strand, native, aligned
+ "@?]?!78!!]?]?", // reverse strand, genomic, aligned + clipped
+ "?]?]!!87!?]?@" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
+ tests::CheckQualityTagsClippedAndAligned(
+ "4=1D2P2I2P2D4=", // CIGAR
+ "?]?]87?]?@", // input
+ {
+ "?]?]87?]?@", // forward strand, genomic
+ "?]?]87?]?@", // forward strand, native
+ "?]?]!!!87!!!!?]?@", // forward strand, genomic, aligned
+ "?]?]!!!87!!!!?]?@", // forward strand, native, aligned
+ "?]?]!!!87!!!!?]?@", // forward strand, genomic, aligned + clipped
+ "?]?]!!!87!!!!?]?@", // forward strand, native, aligned + clipped
+ "@?]?78]?]?", // reverse strand, genomic
+ "?]?]87?]?@", // reverse strand, native
+ "@?]?!!!78!!!!]?]?", // reverse strand, genomic, aligned
+ "?]?]!!!!87!!!?]?@", // reverse strand, native, aligned
+ "@?]?!!!78!!!!]?]?", // reverse strand, genomic, aligned + clipped
+ "?]?]!!!!87!!!?]?@" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
+ tests::CheckQualityTagsClippedAndAligned(
+ "3S4=3D4=3S", // CIGAR
+ "vvv?]?]?]?@xxx", // input
+ {
+ "vvv?]?]?]?@xxx", // forward strand, genomic
+ "vvv?]?]?]?@xxx", // forward strand, native
+ "vvv?]?]!!!?]?@xxx", // forward strand, genomic, aligned
+ "vvv?]?]!!!?]?@xxx", // forward strand, native, aligned
+ "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
+ "?]?]!!!?]?@", // forward strand, native, aligned, clipped
+ "xxx@?]?]?]?vvv", // reverse strand, genomic
+ "vvv?]?]?]?@xxx", // reverse strand, native
+ "xxx@?]?!!!]?]?vvv", // reverse strand, genomic, aligned
+ "vvv?]?]!!!?]?@xxx", // reverse strand, native, aligned
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
+ "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
+ tests::CheckQualityTagsClippedAndAligned(
+ "2H4=3D4=3H", // CIGAR
+ "?]?]?]?@", // input
+ {
+ "?]?]?]?@", // forward strand, genomic
+ "?]?]?]?@", // forward strand, native
+ "?]?]!!!?]?@", // forward strand, genomic, aligned
+ "?]?]!!!?]?@", // forward strand, native, aligned
+ "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
+ "?]?]!!!?]?@", // forward strand, native, aligned, clipped
+ "@?]?]?]?", // reverse strand, genomic
+ "?]?]?]?@", // reverse strand, native
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned
+ "?]?]!!!?]?@", // reverse strand, native, aligned
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
+ "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
+ tests::CheckQualityTagsClippedAndAligned(
+ "2H3S4=3D4=3S3H", // CIGAR
+ "vvv?]?]?]?@xxx", // input
+ {
+ "vvv?]?]?]?@xxx", // forward strand, genomic
+ "vvv?]?]?]?@xxx", // forward strand, native
+ "vvv?]?]!!!?]?@xxx", // forward strand, genomic, aligned
+ "vvv?]?]!!!?]?@xxx", // forward strand, native, aligned
+ "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
+ "?]?]!!!?]?@", // forward strand, native, aligned, clipped
+ "xxx@?]?]?]?vvv", // reverse strand, genomic
+ "vvv?]?]?]?@xxx", // reverse strand, native
+ "xxx@?]?!!!]?]?vvv", // reverse strand, genomic, aligned
+ "vvv?]?]!!!?]?@xxx", // reverse strand, native, aligned
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
+ "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, BaseTagsClippedAndAligned)
+{
+ {
+ SCOPED_TRACE("CIGAR: 4=3D4=");
+ tests::CheckBaseTagsClippedAndAligned(
+ "4=3D4=", // CIGAR
+ "AACCGTTA", // input
+ {
+ "AACCGTTA", // forward strand, genomic
+ "AACCGTTA", // forward strand, native
+ "AACC---GTTA", // forward strand, genomic, aligned
+ "AACC---GTTA", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned, clipped
+ "AACC---GTTA", // forward strand, native, aligned, clipped
+ "TAACGGTT", // reverse strand, genomic
+ "AACCGTTA", // reverse strand, native
+ "TAAC---GGTT", // reverse strand, genomic, aligned
+ "AACC---GTTA", // reverse strand, native, aligned
+ "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
+ "AACC---GTTA" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
+ tests::CheckBaseTagsClippedAndAligned(
+ "4=1D2I2D4=", // CIGAR
+ "ATCCTAGGTT", // input
+ {
+ "ATCCTAGGTT", // forward strand, genomic
+ "ATCCTAGGTT", // forward strand, native
+ "ATCC-TA--GGTT", // forward strand, genomic, aligned
+ "ATCC-TA--GGTT", // forward strand, native, aligned
+ "ATCC-TA--GGTT", // forward strand, genomic, aligned, clipped
+ "ATCC-TA--GGTT", // forward strand, native, aligned, clipped
+ "AACCTAGGAT", // reverse strand, genomic
+ "ATCCTAGGTT", // reverse strand, native
+ "AACC-TA--GGAT", // reverse strand, genomic, aligned
+ "ATCC--TA-GGTT", // reverse strand, native, aligned
+ "AACC-TA--GGAT", // reverse strand, genomic, aligned, clipped
+ "ATCC--TA-GGTT" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
+ tests::CheckBaseTagsClippedAndAligned(
+ "4=1D2P2I2P2D4=", // CIGAR
+ "ATCCTAGGTT", // input
+ {
+ "ATCCTAGGTT", // forward strand, genomic
+ "ATCCTAGGTT", // forward strand, native
+ "ATCC-**TA**--GGTT", // forward strand, genomic, aligned
+ "ATCC-**TA**--GGTT", // forward strand, native, aligned
+ "ATCC-**TA**--GGTT", // forward strand, genomic, aligned, clipped
+ "ATCC-**TA**--GGTT", // forward strand, native, aligned, clipped
+ "AACCTAGGAT", // reverse strand, genomic
+ "ATCCTAGGTT", // reverse strand, native
+ "AACC-**TA**--GGAT", // reverse strand, genomic, aligned
+ "ATCC--**TA**-GGTT", // reverse strand, native, aligned
+ "AACC-**TA**--GGAT", // reverse strand, genomic, aligned, clipped
+ "ATCC--**TA**-GGTT" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
+ tests::CheckBaseTagsClippedAndAligned(
+ "3S4=3D4=3S", // CIGAR
+ "TTTAACCGTTACCG", // input
+ {
+ "TTTAACCGTTACCG", // forward strand, genomic
+ "TTTAACCGTTACCG", // forward strand, native
+ "TTTAACC---GTTACCG", // forward strand, genomic, aligned
+ "TTTAACC---GTTACCG", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned, clipped
+ "AACC---GTTA", // forward strand, native, aligned, clipped
+ "CGGTAACGGTTAAA", // reverse strand, genomic
+ "TTTAACCGTTACCG", // reverse strand, native
+ "CGGTAAC---GGTTAAA", // reverse strand, genomic, aligned
+ "TTTAACC---GTTACCG", // reverse strand, native, aligned
+ "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
+ "AACC---GTTA" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
+ tests::CheckBaseTagsClippedAndAligned(
+ "2H4=3D4=3H", // CIGAR
+ "AACCGTTA", // input
+ {
+ "AACCGTTA", // forward strand, genomic
+ "AACCGTTA", // forward strand, native
+ "AACC---GTTA", // forward strand, genomic, aligned
+ "AACC---GTTA", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned, clipped
+ "AACC---GTTA", // forward strand, native, aligned, clipped
+ "TAACGGTT", // reverse strand, genomic
+ "AACCGTTA", // reverse strand, native
+ "TAAC---GGTT", // reverse strand, genomic, aligned
+ "AACC---GTTA", // reverse strand, native, aligned
+ "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
+ "AACC---GTTA" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
+ tests::CheckBaseTagsClippedAndAligned(
+ "2H3S4=3D4=3S3H", // CIGAR
+ "TTTAACCGTTACCG", // input
+ {
+ "TTTAACCGTTACCG", // forward strand, genomic
+ "TTTAACCGTTACCG", // forward strand, native
+ "TTTAACC---GTTACCG", // forward strand, genomic, aligned
+ "TTTAACC---GTTACCG", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned, clipped
+ "AACC---GTTA", // forward strand, native, aligned, clipped
+ "CGGTAACGGTTAAA", // reverse strand, genomic
+ "TTTAACCGTTACCG", // reverse strand, native
+ "CGGTAAC---GGTTAAA", // reverse strand, genomic, aligned
+ "TTTAACC---GTTACCG", // reverse strand, native, aligned
+ "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
+ "AACC---GTTA" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, FrameTagsClippedAndAligned)
+{
+ {
+ SCOPED_TRACE("CIGAR: 4=3D4=");
+ tests::CheckFrameTagsClippedAndAligned(
+ "4=3D4=", // CIGAR
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // input
+ {
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
+ tests::CheckFrameTagsClippedAndAligned(
+ "4=1D2I2D4=", // CIGAR
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // input
+ {
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
+ tests::CheckFrameTagsClippedAndAligned(
+ "4=1D2P2I2P2D4=", // CIGAR
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // input
+ {
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
+ tests::CheckFrameTagsClippedAndAligned(
+ "3S4=3D4=3S", // CIGAR
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // input
+ {
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
+ { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
+ tests::CheckFrameTagsClippedAndAligned(
+ "2H4=3D4=3H", // CIGAR
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // input
+ {
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
+ tests::CheckFrameTagsClippedAndAligned(
+ "2H3S4=3D4=3S3H", // CIGAR
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // input
+ {
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
+ { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, PulseBaseTags)
+{
+ {
+ SCOPED_TRACE("CIGAR: 4=3D4=");
+ tests::CheckPulseBaseTags(
+ "4=3D4=", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ "AAaaCCGggTTA", // tag data
+
+ { // all pulses
+
+ "AAaaCCGggTTA", // forward strand, genomic
+ "AAaaCCGggTTA", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "TAAccCGGttTT", // reverse strand, genomic
+ "AAaaCCGggTTA", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "AACCGTTA", // forward strand, genomic
+ "AACCGTTA", // forward strand, native
+ "AACC---GTTA", // forward strand, genomic, aligned
+ "AACC---GTTA", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned, clipped
+ "AACC---GTTA", // forward strand, native, aligned, clipped
+ "TAACGGTT", // reverse strand, genomic
+ "AACCGTTA", // reverse strand, native
+ "TAAC---GGTT", // reverse strand, genomic, aligned
+ "AACC---GTTA", // reverse strand, native, aligned
+ "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
+ "AACC---GTTA" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
+ tests::CheckPulseBaseTags(
+ "4=1D2I2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ "ATttCCTtAGGggTT", // tag data
+
+ { // all pulses
+
+ "ATttCCTtAGGggTT", // forward strand, genomic
+ "ATttCCTtAGGggTT", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "AAccCCTaAGGaaAT", // reverse strand, genomic
+ "ATttCCTtAGGggTT", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "ATCCTAGGTT", // forward strand, genomic
+ "ATCCTAGGTT", // forward strand, native
+ "ATCC-TA--GGTT", // forward strand, genomic, aligned
+ "ATCC-TA--GGTT", // forward strand, native, aligned
+ "ATCC-TA--GGTT", // forward strand, genomic, aligned, clipped
+ "ATCC-TA--GGTT", // forward strand, native, aligned, clipped
+ "AACCTAGGAT", // reverse strand, genomic
+ "ATCCTAGGTT", // reverse strand, native
+ "AACC-TA--GGAT", // reverse strand, genomic, aligned
+ "ATCC--TA-GGTT", // reverse strand, native, aligned
+ "AACC-TA--GGAT", // reverse strand, genomic, aligned, clipped
+ "ATCC--TA-GGTT" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
+ tests::CheckPulseBaseTags(
+ "4=1D2P2I2P2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ "ATttCCTtAGGggTT", // tag data
+ {
+ "ATttCCTtAGGggTT", // forward strand, genomic
+ "ATttCCTtAGGggTT", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "AAccCCTaAGGaaAT", // reverse strand, genomic
+ "ATttCCTtAGGggTT", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ {
+ "ATCCTAGGTT", // forward strand, genomic
+ "ATCCTAGGTT", // forward strand, native
+ "ATCC-**TA**--GGTT", // forward strand, genomic, aligned
+ "ATCC-**TA**--GGTT", // forward strand, native, aligned
+ "ATCC-**TA**--GGTT", // forward strand, genomic, aligned, clipped
+ "ATCC-**TA**--GGTT", // forward strand, native, aligned, clipped
+ "AACCTAGGAT", // reverse strand, genomic
+ "ATCCTAGGTT", // reverse strand, native
+ "AACC-**TA**--GGAT", // reverse strand, genomic, aligned
+ "ATCC--**TA**-GGTT", // reverse strand, native, aligned
+ "AACC-**TA**--GGAT", // reverse strand, genomic, aligned, clipped
+ "ATCC--**TA**-GGTT" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
+ tests::CheckPulseBaseTags(
+ "3S4=3D4=3S", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ "TTTttAACCccGTTAaaCCG", // tag data
+
+ { // all pulses
+
+ "TTTttAACCccGTTAaaCCG", // forward strand, genomic
+ "TTTttAACCccGTTAaaCCG", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "CGGttTAACggGGTTaaAAA", // reverse strand, genomic
+ "TTTttAACCccGTTAaaCCG", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "TTTAACCGTTACCG", // forward strand, genomic
+ "TTTAACCGTTACCG", // forward strand, native
+ "TTTAACC---GTTACCG", // forward strand, genomic, aligned
+ "TTTAACC---GTTACCG", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned, clipped
+ "AACC---GTTA", // forward strand, native, aligned, clipped
+ "CGGTAACGGTTAAA", // reverse strand, genomic
+ "TTTAACCGTTACCG", // reverse strand, native
+ "CGGTAAC---GGTTAAA", // reverse strand, genomic, aligned
+ "TTTAACC---GTTACCG", // reverse strand, native, aligned
+ "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
+ "AACC---GTTA" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
+ tests::CheckPulseBaseTags(
+ "2H4=3D4=3H", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ "AAaaCCGggTTA", // tag data
+
+ { // all pulses
+
+ "AAaaCCGggTTA", // forward strand, genomic
+ "AAaaCCGggTTA", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "TAAccCGGttTT", // reverse strand, genomic
+ "AAaaCCGggTTA", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "AACCGTTA", // forward strand, genomic
+ "AACCGTTA", // forward strand, native
+ "AACC---GTTA", // forward strand, genomic, aligned
+ "AACC---GTTA", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned, clipped
+ "AACC---GTTA", // forward strand, native, aligned, clipped
+ "TAACGGTT", // reverse strand, genomic
+ "AACCGTTA", // reverse strand, native
+ "TAAC---GGTT", // reverse strand, genomic, aligned
+ "AACC---GTTA", // reverse strand, native, aligned
+ "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
+ "AACC---GTTA" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
+ tests::CheckPulseBaseTags(
+ "2H3S4=3D4=3S3H", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ "TTTttAACCccGTTAaaCCG", // tag data
+
+ { // all pulses
+
+ "TTTttAACCccGTTAaaCCG", // forward strand, genomic
+ "TTTttAACCccGTTAaaCCG", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "CGGttTAACggGGTTaaAAA", // reverse strand, genomic
+ "TTTttAACCccGTTAaaCCG", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "TTTAACCGTTACCG", // forward strand, genomic
+ "TTTAACCGTTACCG", // forward strand, native
+ "TTTAACC---GTTACCG", // forward strand, genomic, aligned
+ "TTTAACC---GTTACCG", // forward strand, native, aligned
+ "AACC---GTTA", // forward strand, genomic, aligned, clipped
+ "AACC---GTTA", // forward strand, native, aligned, clipped
+ "CGGTAACGGTTAAA", // reverse strand, genomic
+ "TTTAACCGTTACCG", // reverse strand, native
+ "CGGTAAC---GGTTAAA", // reverse strand, genomic, aligned
+ "TTTAACC---GTTACCG", // reverse strand, native, aligned
+ "TAAC---GGTT", // reverse strand, genomic, aligned, clipped
+ "AACC---GTTA" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, PulseQualityTags)
+{
+ {
+ SCOPED_TRACE("CIGAR: 4=3D4=");
+ tests::CheckPulseQualityTags(
+ "4=3D4=", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ "?]!!?]?!!]?@", // tag data
+
+ { // all pulses
+
+ "?]!!?]?!!]?@", // forward strand, genomic
+ "?]!!?]?!!]?@", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned + clipped
+ "", // forward strand, native, aligned + clipped
+ "@?]!!?]?!!]?", // reverse strand, genomic
+ "?]!!?]?!!]?@", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned + clipped
+ "" // reverse strand, native, aligned + clipped
+ },
+ { // basecalls only
+
+ "?]?]?]?@", // forward strand, genomic
+ "?]?]?]?@", // forward strand, native
+ "?]?]!!!?]?@", // forward strand, genomic, aligned
+ "?]?]!!!?]?@", // forward strand, native, aligned
+ "?]?]!!!?]?@", // forward strand, genomic, aligned + clipped
+ "?]?]!!!?]?@", // forward strand, native, aligned + clipped
+ "@?]?]?]?", // reverse strand, genomic
+ "?]?]?]?@", // reverse strand, native
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned
+ "?]?]!!!?]?@", // reverse strand, native, aligned
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned + clipped
+ "?]?]!!!?]?@" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
+ tests::CheckPulseQualityTags(
+ "4=1D2I2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ "?]!!?]8!7?]!!?@", // tag data
+
+ { // all pulses
+
+ "?]!!?]8!7?]!!?@", // forward strand, genomic
+ "?]!!?]8!7?]!!?@", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned + clipped
+ "", // forward strand, native, aligned + clipped
+ "@?!!]?7!8]?!!]?", // reverse strand, genomic
+ "?]!!?]8!7?]!!?@", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned + clipped
+ "" // reverse strand, native, aligned + clipped
+ },
+ { // basecalls only
+
+ "?]?]87?]?@", // forward strand, genomic
+ "?]?]87?]?@", // forward strand, native
+ "?]?]!87!!?]?@", // forward strand, genomic, aligned
+ "?]?]!87!!?]?@", // forward strand, native, aligned
+ "?]?]!87!!?]?@", // forward strand, genomic, aligned + clipped
+ "?]?]!87!!?]?@", // forward strand, native, aligned + clipped
+ "@?]?78]?]?", // reverse strand, genomic
+ "?]?]87?]?@", // reverse strand, native
+ "@?]?!78!!]?]?", // reverse strand, genomic, aligned
+ "?]?]!!87!?]?@", // reverse strand, native, aligned
+ "@?]?!78!!]?]?", // reverse strand, genomic, aligned + clipped
+ "?]?]!!87!?]?@" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
+ tests::CheckPulseQualityTags(
+ "4=1D2P2I2P2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ "?]!!?]8!7?]!!?@", // tag data
+ {
+ "?]!!?]8!7?]!!?@", // forward strand, genomic
+ "?]!!?]8!7?]!!?@", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned + clipped
+ "", // forward strand, native, aligned + clipped
+ "@?!!]?7!8]?!!]?", // reverse strand, genomic
+ "?]!!?]8!7?]!!?@", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned + clipped
+ "" // reverse strand, native, aligned + clipped
+ },
+ {
+ "?]?]87?]?@", // forward strand, genomic
+ "?]?]87?]?@", // forward strand, native
+ "?]?]!!!87!!!!?]?@", // forward strand, genomic, aligned
+ "?]?]!!!87!!!!?]?@", // forward strand, native, aligned
+ "?]?]!!!87!!!!?]?@", // forward strand, genomic, aligned + clipped
+ "?]?]!!!87!!!!?]?@", // forward strand, native, aligned + clipped
+ "@?]?78]?]?", // reverse strand, genomic
+ "?]?]87?]?@", // reverse strand, native
+ "@?]?!!!78!!!!]?]?", // reverse strand, genomic, aligned
+ "?]?]!!!!87!!!?]?@", // reverse strand, native, aligned
+ "@?]?!!!78!!!!]?]?", // reverse strand, genomic, aligned + clipped
+ "?]?]!!!!87!!!?]?@" // reverse strand, native, aligned + clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
+ tests::CheckPulseQualityTags(
+ "3S4=3D4=3S", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ "vvv!!?]?]!!?]?@!!xxx", // tag data
+
+ { // all pulses
+
+ "vvv!!?]?]!!?]?@!!xxx", // forward strand, genomic
+ "vvv!!?]?]!!?]?@!!xxx", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "xxx!!@?]?!!]?]?!!vvv", // reverse strand, genomic
+ "vvv!!?]?]!!?]?@!!xxx", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "vvv?]?]?]?@xxx", // forward strand, genomic
+ "vvv?]?]?]?@xxx", // forward strand, native
+ "vvv?]?]!!!?]?@xxx", // forward strand, genomic, aligned
+ "vvv?]?]!!!?]?@xxx", // forward strand, native, aligned
+ "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
+ "?]?]!!!?]?@", // forward strand, native, aligned, clipped
+ "xxx@?]?]?]?vvv", // reverse strand, genomic
+ "vvv?]?]?]?@xxx", // reverse strand, native
+ "xxx@?]?!!!]?]?vvv", // reverse strand, genomic, aligned
+ "vvv?]?]!!!?]?@xxx", // reverse strand, native, aligned
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
+ "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
+ tests::CheckPulseQualityTags(
+ "2H4=3D4=3H", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ "?]!!?]?!!]?@", // tag data
+
+ { // all pulses
+
+ "?]!!?]?!!]?@", // forward strand, genomic
+ "?]!!?]?!!]?@", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "@?]!!?]?!!]?", // reverse strand, genomic
+ "?]!!?]?!!]?@", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "?]?]?]?@", // forward strand, genomic
+ "?]?]?]?@", // forward strand, native
+ "?]?]!!!?]?@", // forward strand, genomic, aligned
+ "?]?]!!!?]?@", // forward strand, native, aligned
+ "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
+ "?]?]!!!?]?@", // forward strand, native, aligned, clipped
+ "@?]?]?]?", // reverse strand, genomic
+ "?]?]?]?@", // reverse strand, native
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned
+ "?]?]!!!?]?@", // reverse strand, native, aligned
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
+ "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
+ tests::CheckPulseQualityTags(
+ "2H3S4=3D4=3S3H", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ "vvv!!?]?]!!?]?@!!xxx", // tag data
+
+ { // all pulses
+
+ "vvv!!?]?]!!?]?@!!xxx", // forward strand, genomic
+ "vvv!!?]?]!!?]?@!!xxx", // forward strand, native
+ "", // forward strand, genomic, aligned
+ "", // forward strand, native, aligned
+ "", // forward strand, genomic, aligned, clipped
+ "", // forward strand, native, aligned, clipped
+ "xxx!!@?]?!!]?]?!!vvv", // reverse strand, genomic
+ "vvv!!?]?]!!?]?@!!xxx", // reverse strand, native
+ "", // reverse strand, genomic, aligned
+ "", // reverse strand, native, aligned
+ "", // reverse strand, genomic, aligned, clipped
+ "" // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ "vvv?]?]?]?@xxx", // forward strand, genomic
+ "vvv?]?]?]?@xxx", // forward strand, native
+ "vvv?]?]!!!?]?@xxx", // forward strand, genomic, aligned
+ "vvv?]?]!!!?]?@xxx", // forward strand, native, aligned
+ "?]?]!!!?]?@", // forward strand, genomic, aligned, clipped
+ "?]?]!!!?]?@", // forward strand, native, aligned, clipped
+ "xxx@?]?]?]?vvv", // reverse strand, genomic
+ "vvv?]?]?]?@xxx", // reverse strand, native
+ "xxx@?]?!!!]?]?vvv", // reverse strand, genomic, aligned
+ "vvv?]?]!!!?]?@xxx", // reverse strand, native, aligned
+ "@?]?!!!]?]?", // reverse strand, genomic, aligned, clipped
+ "?]?]!!!?]?@" // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, PulseFrameTags)
+{
+ {
+ SCOPED_TRACE("CIGAR: 4=3D4=");
+ tests::CheckPulseFrameTags(
+ "4=3D4=", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
+ tests::CheckPulseFrameTags(
+ "4=1D2I2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
+ tests::CheckPulseFrameTags(
+ "4=1D2P2I2P2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
+ tests::CheckPulseFrameTags(
+ "3S4=3D4=3S", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // tag data
+
+ { // all pulses
+
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
+ { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
+ tests::CheckPulseFrameTags(
+ "2H4=3D4=3H", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
+ tests::CheckPulseFrameTags(
+ "2H3S4=3D4=3S3H", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // tag data
+
+ { // all pulses
+
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
+ { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+}
+
+TEST(BamRecordTest, PulseUIntTags)
+{
+ {
+ SCOPED_TRACE("CIGAR: 4=3D4=");
+ tests::CheckPulseUIntTags(
+ "4=3D4=", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0,0, 10, 20, 10, 0,0, 20, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2I2D4=");
+ tests::CheckPulseUIntTags(
+ "4=1D2I2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 80, 70, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 70, 80, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 80, 70, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 4=1D2P2I2P2D4=");
+ tests::CheckPulseUIntTags(
+ "4=1D2P2I2P2D4=", // CIGAR
+ "ATCCTAGGTT", // seqBases
+ "ATttCCTtAGGggTT", // pulseCalls
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 0, 0, 20, 10, 70, 0, 80, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0, 0, 10, 20, 80, 0, 70, 10, 20, 0, 0, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 80, 70, 0, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 70, 80, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 80, 70, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 70, 80, 0, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 0, 80, 70, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 3S4=3D4=3S");
+ tests::CheckPulseUIntTags(
+ "3S4=3D4=3S", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // tag data
+
+ { // all pulses
+
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
+ { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H4=3D4=3H");
+ tests::CheckPulseUIntTags(
+ "2H4=3D4=3H", // CIGAR
+ "AACCGTTA", // seqBases
+ "AAaaCCGggTTA", // pulseCalls
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // tag data
+
+ { // all pulses
+
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 0, 0, 10, 20, 10, 0, 0, 20, 10, 30 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // forward strand, native
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 30, 10, 20, 10, 20, 10, 20, 10 }, // reverse strand, genomic
+ { 10, 20, 10, 20, 10, 20, 10, 30 }, // reverse strand, native
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+ {
+ SCOPED_TRACE("CIGAR: 2H3S4=3D4=3S3H");
+ tests::CheckPulseUIntTags(
+ "2H3S4=3D4=3S3H", // CIGAR
+ "TTTAACCGTTACCG", // seqBases
+ "TTTttAACCccGTTAaaCCG", // pulseCalls
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // tag data
+
+ { // all pulses
+
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // forward strand, native
+ { }, // forward strand, genomic, aligned
+ { }, // forward strand, native, aligned
+ { }, // forward strand, genomic, aligned, clipped
+ { }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 0, 0, 30, 10, 20, 10, 0, 0, 20, 10, 20, 10, 0, 0, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 0, 0, 10, 20, 10, 20, 0, 0, 10, 20, 10, 30, 0, 0, 50, 50, 50 }, // reverse strand, native
+ { }, // reverse strand, genomic, aligned
+ { }, // reverse strand, native, aligned
+ { }, // reverse strand, genomic, aligned, clipped
+ { } // reverse strand, native, aligned, clipped
+ },
+ { // basecalls only
+
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // forward strand, native, aligned
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 }, // forward strand, native, aligned, clipped
+ { 50, 50, 50, 30, 10, 20, 10, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic
+ { 40, 40, 40, 10, 20, 10, 20, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native
+ { 50, 50, 50, 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10, 40, 40, 40 }, // reverse strand, genomic, aligned
+ { 40, 40, 40, 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30, 50, 50, 50 }, // reverse strand, native, aligned
+ { 30, 10, 20, 10, 0, 0, 0, 20, 10, 20, 10 }, // reverse strand, genomic, aligned, clipped
+ { 10, 20, 10, 20, 0, 0, 0, 10, 20, 10, 30 } // reverse strand, native, aligned, clipped
+ }
+ );
+ }
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/BamRecordBuilder.h>
+#include <pbbam/BamTagCodec.h>
+#include <chrono>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+namespace tests {
+
+static
+void CheckRawData(const BamRecordImpl& bam)
+{
+ // ensure raw data (lengths at least) matches API-facing data
+
+ const uint32_t expectedNameLength = bam.Name().size() + 1;
+ const uint32_t expectedNumCigarOps = bam.CigarData().size();
+ const int32_t expectedSeqLength = bam.Sequence().length();
+ const size_t expectedTagsLength = BamTagCodec::Encode(bam.Tags()).size();
+
+ // Name CIGAR Sequence Quals Tags
+ // l_qname + (n_cigar * 4) + (l_qseq+1)/2 + l_qseq + << TAGS >>
+
+ const int expectedTotalDataLength = expectedNameLength +
+ (expectedNumCigarOps * 4) +
+ (expectedSeqLength+1)/2 +
+ expectedSeqLength +
+ expectedTagsLength;
+
+ EXPECT_TRUE((bool)bam.d_);
+ EXPECT_EQ(expectedNameLength, bam.d_->core.l_qname);
+ EXPECT_EQ(expectedNumCigarOps, bam.d_->core.n_cigar);
+ EXPECT_EQ(expectedSeqLength, bam.d_->core.l_qseq);
+ EXPECT_EQ(expectedTotalDataLength, bam.d_->l_data);
+}
+
+static
+void CheckRawData(const BamRecord& bam)
+{ CheckRawData(bam.impl_); }
+
+} // namespace tests
+
+TEST(BamRecordBuilderTest, DefaultValues)
+{
+ BamRecordBuilder builder;
+ BamRecord bam = builder.Build();
+
+ const PBBAM_SHARED_PTR<bam1_t> rawData = bam.impl_.d_;
+ ASSERT_TRUE((bool)rawData);
+
+ // fixed-length (core) data
+ EXPECT_EQ(0, rawData->core.tid);
+ EXPECT_EQ(0, rawData->core.pos);
+ EXPECT_EQ(0, rawData->core.bin);
+ EXPECT_EQ(0, rawData->core.qual);
+ EXPECT_EQ(1, rawData->core.l_qname); // initialized w/ NULL-term
+ EXPECT_EQ(0, rawData->core.flag);
+ EXPECT_EQ(0, rawData->core.n_cigar);
+ EXPECT_EQ(0, rawData->core.l_qseq);
+ EXPECT_EQ(0, rawData->core.mtid);
+ EXPECT_EQ(0, rawData->core.mpos);
+ EXPECT_EQ(0, rawData->core.isize);
+
+ // variable length data
+ EXPECT_TRUE(rawData->data != nullptr);
+ EXPECT_EQ(1, rawData->l_data);
+ EXPECT_EQ((int)0x800, rawData->m_data); // check this if we change or tune later
+
+ // -------------------------------
+ // check data via API calls
+ // -------------------------------
+
+ EXPECT_EQ(0, bam.impl_.Bin());
+ EXPECT_EQ(0, bam.impl_.Flag());
+ EXPECT_EQ(0, bam.impl_.InsertSize());
+ EXPECT_EQ(0, bam.impl_.MapQuality());
+ EXPECT_EQ(0, bam.impl_.MateReferenceId());
+ EXPECT_EQ(0, bam.impl_.MatePosition());
+ EXPECT_EQ(0, bam.impl_.Position());
+ EXPECT_EQ(0, bam.impl_.ReferenceId());
+ EXPECT_EQ(0, bam.impl_.Tags().size());
+
+ EXPECT_FALSE(bam.impl_.IsDuplicate());
+ EXPECT_FALSE(bam.impl_.IsFailedQC());
+ EXPECT_FALSE(bam.impl_.IsFirstMate());
+ EXPECT_TRUE(bam.impl_.IsMapped());
+ EXPECT_TRUE(bam.impl_.IsMateMapped());
+ EXPECT_FALSE(bam.impl_.IsMateReverseStrand());
+ EXPECT_FALSE(bam.impl_.IsPaired());
+ EXPECT_TRUE(bam.impl_.IsPrimaryAlignment());
+ EXPECT_FALSE(bam.impl_.IsProperPair());
+ EXPECT_FALSE(bam.impl_.IsReverseStrand());
+ EXPECT_FALSE(bam.impl_.IsSecondMate());
+ EXPECT_FALSE(bam.impl_.IsSupplementaryAlignment());
+
+ const std::string emptyString = "";
+ EXPECT_EQ(emptyString, bam.impl_.Name());
+ EXPECT_EQ(emptyString, bam.impl_.CigarData().ToStdString());
+ EXPECT_EQ(emptyString, bam.impl_.Sequence());
+ EXPECT_EQ(emptyString, bam.impl_.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordBuilderTest, CheckSetters)
+{
+ // should be 28 bytes, encoded
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = static_cast<int32_t>(-42);
+
+ BamRecordBuilder builder;
+ builder.Bin(42)
+ .Flag(42)
+ .InsertSize(42)
+ .MapQuality(42)
+ .MatePosition(42)
+ .MateReferenceId(42)
+ .Position(42)
+ .ReferenceId(42)
+ .Tags(tags);
+
+ BamRecord bam = builder.Build();
+
+ // -------------------------------
+ // check raw data
+ // -------------------------------
+
+ const PBBAM_SHARED_PTR<bam1_t> rawData = bam.impl_.d_;
+ ASSERT_TRUE((bool)rawData);
+
+ // fixed-length (core) data
+ EXPECT_EQ(42, rawData->core.tid);
+ EXPECT_EQ(42, rawData->core.pos);
+ EXPECT_EQ(42, rawData->core.bin);
+ EXPECT_EQ(42, rawData->core.qual);
+ EXPECT_EQ(1, rawData->core.l_qname); // initialized w/ NULL-term
+ EXPECT_EQ(42, rawData->core.flag);
+ EXPECT_EQ(0, rawData->core.n_cigar);
+ EXPECT_EQ(0, rawData->core.l_qseq);
+ EXPECT_EQ(42, rawData->core.mtid);
+ EXPECT_EQ(42, rawData->core.mpos);
+ EXPECT_EQ(42, rawData->core.isize);
+
+ // variable length data
+ EXPECT_TRUE(rawData->data != nullptr);
+ EXPECT_EQ(29, rawData->l_data); // NULL-term qname + tags
+ EXPECT_EQ((int)0x800, rawData->m_data); // check this if we change or tune later
+
+ // -------------------------------
+ // check data via API calls
+ // -------------------------------
+
+ EXPECT_EQ(42, bam.impl_.Bin());
+ EXPECT_EQ(42, bam.impl_.Flag());
+ EXPECT_EQ(42, bam.impl_.InsertSize());
+ EXPECT_EQ(42, bam.impl_.MapQuality());
+ EXPECT_EQ(42, bam.impl_.MateReferenceId());
+ EXPECT_EQ(42, bam.impl_.MatePosition());
+ EXPECT_EQ(42, bam.impl_.Position());
+ EXPECT_EQ(42, bam.impl_.ReferenceId());
+
+ const TagCollection& fetchedTags = bam.impl_.Tags();
+
+ EXPECT_TRUE(fetchedTags.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(std::string("1abc75"), fetchedTags.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), fetchedTags.at("XY").ToInt32());
+ EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), fetchedTags.at("CA").ToUInt8Array());
+}
+
+//#define SEQ_LENGTH 7000
+//#define NUM_RECORDS 1000
+
+//const std::string& TEST_SEQUENCE = std::string(SEQ_LENGTH, 'G');
+//const std::string& TEST_QUALITIES = std::string(SEQ_LENGTH, '=');
+//const std::string& TEST_NAME = std::string(SEQ_LENGTH, '/');
+//const std::string& TEST_TAGDATA = std::string(SEQ_LENGTH, '2');
+
+//TEST(BamRecordBuilderTest, JustDoingSomeTimings_BamRecordBuilder)
+//{
+
+// BamRecordBuilder builder;
+
+// TagCollection tags;
+// tags["aa"] = TEST_TAGDATA;
+// tags["bb"] = TEST_TAGDATA;
+// tags["cc"] = TEST_TAGDATA;
+// tags["dd"] = TEST_TAGDATA;
+// tags["ee"] = TEST_TAGDATA;
+// tags["ff"] = TEST_TAGDATA;
+
+// auto start = std::chrono::steady_clock::now();
+
+// BamRecord record;
+// for (size_t i = 0; i < NUM_RECORDS; ++i) {
+// builder.Sequence(TEST_SEQUENCE)
+// .Qualities(TEST_QUALITIES)
+// .Name(TEST_NAME)
+// .Tags(tags)
+// .BuildInPlace(record);
+// }
+// auto end = std::chrono::steady_clock::now();
+// (void)record;
+// auto diff = end - start;
+// std::cout << std::chrono::duration <double, std::milli>(diff).count() << " ms" << std::endl;
+//}
+
+
+//TEST(BamRecordBuilderTest, JustDoingSomeTimings_BamRecordOnly)
+//{
+// TagCollection tags;
+// tags["aa"] = TEST_TAGDATA;
+// tags["bb"] = TEST_TAGDATA;
+// tags["cc"] = TEST_TAGDATA;
+// tags["dd"] = TEST_TAGDATA;
+// tags["ee"] = TEST_TAGDATA;
+// tags["ff"] = TEST_TAGDATA;
+
+// auto start = std::chrono::steady_clock::now();
+
+// BamRecord record;
+// for (size_t i = 0; i < NUM_RECORDS; ++i) {
+// record.SetSequenceAndQualities(TEST_SEQUENCE, TEST_QUALITIES);
+// record.Name(TEST_NAME);
+// record.Tags(tags);
+// }
+// auto end = std::chrono::steady_clock::now();
+// (void)record;
+// auto diff = end - start;
+// std::cout << std::chrono::duration <double, std::milli>(diff).count() << " ms" << std::endl;
+//}
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/BamRecordView.h>
+#include <pbbam/BamTagCodec.h>
+#include <chrono>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+typedef vector<uint16_t> f_data;
+
+namespace tests {
+
+static
+BamRecord MakeRecord(const Position qStart,
+ const Position qEnd,
+ const string& seq,
+ const string& quals,
+ const string& tagBases,
+ const string& tagQuals,
+ const f_data& frames,
+ const string& pulseCall = "",
+ const string& pulseBases = "",
+ const string& pulseQuals = "",
+ const f_data& pulseFrames = f_data())
+{
+ BamRecordImpl impl;
+ impl.SetSequenceAndQualities(seq, quals);
+
+ TagCollection tags;
+ tags["qs"] = qStart; // qStart
+ tags["qe"] = qEnd; // qEnd
+ tags["dt"] = tagBases; // deletionTag
+ tags["st"] = tagBases; // substitutionTag
+ tags["dq"] = tagQuals; // deletionQV
+ tags["iq"] = tagQuals; // insertionQV
+ tags["mq"] = tagQuals; // mergeQV
+ tags["sq"] = tagQuals; // substitutionQV
+ tags["ip"] = frames; // IPD
+ tags["pw"] = frames; // pulseWidth
+ tags["pc"] = pulseCall; // pulseCall
+ tags["pt"] = pulseBases; // altLabelTag
+ tags["pq"] = pulseQuals; // labelQV
+ tags["pv"] = pulseQuals; // altLabelQV
+ tags["pg"] = pulseQuals; // pulseMergeQV
+ tags["pa"] = pulseFrames; // pkmean
+ tags["pm"] = pulseFrames; // pkmid
+ impl.Tags(tags);
+
+ return BamRecord(std::move(impl));
+}
+
+static
+BamRecord MakeCCSRecord(const string& seq,
+ const string& quals,
+ const string& tagBases,
+ const string& tagQuals,
+ const f_data& frames,
+ const string& pulseCall = "",
+ const string& pulseBases = "",
+ const string& pulseQuals = "",
+ const f_data& pulseFrames = f_data())
+{
+ BamRecordImpl impl;
+ impl.Name("movie/42/ccs");
+ impl.SetSequenceAndQualities(seq, quals);
+
+ TagCollection tags;
+ tags["dt"] = tagBases; // deletionTag
+ tags["st"] = tagBases; // substitutionTag
+ tags["dq"] = tagQuals; // deletionQV
+ tags["iq"] = tagQuals; // insertionQV
+ tags["mq"] = tagQuals; // mergeQV
+ tags["sq"] = tagQuals; // substitutionQV
+ tags["ip"] = frames; // IPD
+ tags["pw"] = frames; // pulseWidth
+ tags["pc"] = pulseCall; // pulseCall
+ tags["pt"] = pulseBases; // altLabelTag
+ tags["pq"] = pulseQuals; // labelQV
+ tags["pv"] = pulseQuals; // altLabelQV
+ tags["pg"] = pulseQuals; // pulseMergeQV
+ tags["pa"] = pulseFrames; // pkmean
+ tags["pm"] = pulseFrames; // pkmid
+ impl.Tags(tags);
+
+ return BamRecord(std::move(impl));
+}
+
+} // namespace tests
+
+TEST(BamRecordClippingTest, ClipToQuery_Basic)
+{
+ const Position qStart = 500;
+ const Position qEnd = 510;
+ const string seq = "AACCGTTAGC";
+ const string quals = "?]?]?]?]?*";
+ const string tagBases = "AACCGTTAGC";
+ const string tagQuals = "?]?]?]?]?*";
+ const f_data frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+
+ const string pulseCall = "ttAaAtaCCGggatTTAcatGCt";
+ const string pulseBases = pulseCall;
+ const string pulseQuals = "==?=]==?]?====]?]===?*=";
+ const f_data pulseFrames = { 0,0,10,0,10,0,0,20,20,30,0,0,0,0,40,40,10,0,0,0,30,20,0 };
+
+ const int32_t tId = 0;
+ const Position tPos = 100;
+ const uint8_t mapQual = 80;
+
+ const Position clipStart = 502;
+ const Position clipEnd = 509;
+
+ const string seq_clipped = "CCGTTAG";
+ const string quals_clipped = "?]?]?]?";
+ const string tagBases_clipped = "CCGTTAG";
+ const string tagQuals_clipped = "?]?]?]?";
+ const f_data frames_clipped = { 20, 20, 30, 40, 40, 10, 30 };
+
+ const string pulseCall_clipped = "CCGggatTTAcatG";
+ const string pulseQuals_clipped = "?]?====]?]===?";
+ const f_data pulseFrames_clipped = { 20,20,30,0,0,0,0,40,40,10,0,0,0,30 };
+
+ const string seq_rev = "GCTAACGGTT";
+ const string pulseCall_rev = "aGCatgTAAatccCGGtaTtTaa";
+ const string quals_rev = "*?]?]?]?]?";
+ const string tagQuals_rev = quals_rev;
+ const f_data frames_rev = { 20, 30, 10, 40, 40, 30, 20, 20, 10, 10 };
+
+ const string seq_rev_clipped = "CTAACGG";
+ const string quals_rev_clipped = "?]?]?]?";
+ const string tagBases_rev_clipped = seq_rev_clipped;
+ const string tagQuals_rev_clipped = quals_rev_clipped;
+ const f_data frames_rev_clipped = { 30, 10, 40, 40, 30, 20, 20 };
+
+ const string pulseCall_rev_clipped = "CatgTAAatccCGG";
+ const string pulseQuals_rev_clipped = "?===]?]====?]?";
+ const f_data pulseFrames_rev_clipped = { 30,0,0,0,10,40,40,0,0,0,0,30,20,20 };
+
+ const string s1_cigar = "10=";
+ const string s2_cigar = "5=3D5=";
+ const string s3_cigar = "4=1D2I2D4=";
+
+ const string s1_cigar_clipped = "7=";
+ const string s2_cigar_clipped = "3=3D4=";
+ const string s3_cigar_clipped = "2=1D2I2D3=";
+
+ const BamRecord prototype = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames,
+ pulseCall, pulseBases, pulseQuals, pulseFrames);
+
+ BamRecord s0 = prototype; // unmapped record
+ BamRecord s1 = prototype.Mapped(tId, tPos, Strand::FORWARD, s1_cigar, mapQual);
+ BamRecord s2 = prototype.Mapped(tId, tPos, Strand::FORWARD, s2_cigar, mapQual);
+ BamRecord s3 = prototype.Mapped(tId, tPos, Strand::FORWARD, s3_cigar, mapQual);
+ BamRecord s1_rev = prototype.Mapped(tId, tPos, Strand::REVERSE, s1_cigar, mapQual);
+ BamRecord s2_rev = prototype.Mapped(tId, tPos, Strand::REVERSE, s2_cigar, mapQual);
+ BamRecord s3_rev = prototype.Mapped(tId, tPos, Strand::REVERSE, s3_cigar, mapQual);
+
+ s0.Clip(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+ s1.Clip(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+ s2.Clip(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+ s3.Clip(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+ s1_rev.Clip(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+ s2_rev.Clip(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+ s3_rev.Clip(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+
+ { // s0
+
+ EXPECT_FALSE(s0.IsMapped());
+ EXPECT_EQ(clipStart, s0.QueryStart());
+ EXPECT_EQ(clipEnd, s0.QueryEnd());
+ EXPECT_EQ(PacBio::BAM::UnmappedPosition, s0.AlignedStart());
+ EXPECT_EQ(PacBio::BAM::UnmappedPosition, s0.AlignedEnd());
+ EXPECT_EQ(PacBio::BAM::UnmappedPosition, s0.ReferenceStart());
+ EXPECT_EQ(PacBio::BAM::UnmappedPosition, s0.ReferenceEnd());
+
+ const BamRecordView view
+ {
+ s0,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq_clipped, view.Sequence());
+ EXPECT_EQ(quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(pulseQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(pulseQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_clipped, view.IPD().Data());
+ EXPECT_EQ(pulseCall_clipped, view.PulseCalls());
+ }
+
+ { // s1 - FORWARD
+
+ EXPECT_TRUE(s1.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s1.AlignedStrand());
+ EXPECT_EQ(clipStart, s1.QueryStart());
+ EXPECT_EQ(clipEnd, s1.QueryEnd());
+ EXPECT_EQ(clipStart, s1.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(clipEnd, s1.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(102, s1.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(109, s1.ReferenceEnd()); // RefStart + 7=
+
+ EXPECT_EQ(s1_cigar_clipped, s1.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s1,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq_clipped, view.Sequence());
+ EXPECT_EQ(quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(pulseQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(pulseQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_clipped, view.IPD().Data());
+ EXPECT_EQ(pulseCall_clipped, view.PulseCalls());
+ }
+
+ { // s1 - REVERSE
+
+ EXPECT_TRUE(s1_rev.IsMapped());
+ EXPECT_EQ(Strand::REVERSE, s1_rev.AlignedStrand());
+ EXPECT_EQ(clipStart, s1_rev.QueryStart());
+ EXPECT_EQ(clipEnd, s1_rev.QueryEnd());
+ EXPECT_EQ(clipStart, s1_rev.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(clipEnd, s1_rev.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(102, s1_rev.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(109, s1_rev.ReferenceEnd()); // RefStart + 7=
+
+ EXPECT_EQ(s1_cigar_clipped, s1_rev.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s1_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq_rev_clipped, view.Sequence());
+ EXPECT_EQ(quals_rev_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases_rev_clipped, view.DeletionTags());
+ EXPECT_EQ(tagQuals_rev_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(pulseQuals_rev_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(pulseQuals_rev_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_rev_clipped, view.IPD().Data());
+ EXPECT_EQ(pulseCall_rev_clipped, view.PulseCalls());
+ }
+
+ { // s2 - FORWARD
+
+ EXPECT_TRUE(s2.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s2.AlignedStrand());
+ EXPECT_EQ(clipStart, s2.QueryStart());
+ EXPECT_EQ(clipEnd, s2.QueryEnd());
+ EXPECT_EQ(clipStart, s2.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(clipEnd, s2.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(102, s2.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(112, s2.ReferenceEnd()); // RefStart + 7= + 3D
+
+ EXPECT_EQ(s2_cigar_clipped, s2.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s2,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq_clipped, view.Sequence());
+ EXPECT_EQ(quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(pulseQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(pulseQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_clipped, view.IPD().Data());
+ }
+
+ { // s2 - REVERSE
+
+ EXPECT_TRUE(s2_rev.IsMapped());
+ EXPECT_EQ(Strand::REVERSE, s2_rev.AlignedStrand());
+ EXPECT_EQ(clipStart, s2_rev.QueryStart());
+ EXPECT_EQ(clipEnd, s2_rev.QueryEnd());
+ EXPECT_EQ(clipStart, s2_rev.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(clipEnd, s2_rev.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(102, s2_rev.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(112, s2_rev.ReferenceEnd()); // RefStart + 7= + 3D
+
+ EXPECT_EQ(s2_cigar_clipped, s2_rev.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s2_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq_rev_clipped, view.Sequence());
+ EXPECT_EQ(quals_rev_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases_rev_clipped, view.DeletionTags());
+ EXPECT_EQ(tagQuals_rev_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(pulseQuals_rev_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(pulseQuals_rev_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_rev_clipped, view.IPD().Data());
+ EXPECT_EQ(pulseCall_rev_clipped, view.PulseCalls());
+ }
+
+ { // s3 - FORWARD
+
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s3.AlignedStrand());
+ EXPECT_EQ(clipStart, s3.QueryStart());
+ EXPECT_EQ(clipEnd, s3.QueryEnd());
+ EXPECT_EQ(clipStart, s3.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(clipEnd, s3.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(102, s3.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(110, s3.ReferenceEnd()); // RefStart + 5= + 3D
+
+ EXPECT_EQ(s3_cigar_clipped, s3.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq_clipped, view.Sequence());
+ EXPECT_EQ(quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(pulseQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(pulseQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_clipped, view.IPD().Data());
+ EXPECT_EQ(pulseCall_clipped, view.PulseCalls());
+ }
+
+ { // s3 - REVERSE
+
+ EXPECT_TRUE(s3_rev.IsMapped());
+ EXPECT_EQ(Strand::REVERSE, s3_rev.AlignedStrand());
+ EXPECT_EQ(clipStart, s3_rev.QueryStart());
+ EXPECT_EQ(clipEnd, s3_rev.QueryEnd());
+ EXPECT_EQ(clipStart, s3_rev.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(clipEnd, s3_rev.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(102, s3_rev.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(110, s3_rev.ReferenceEnd()); // RefStart + 5= + 3D
+
+ EXPECT_EQ(s3_cigar_clipped, s3_rev.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq_rev_clipped, view.Sequence());
+ EXPECT_EQ(quals_rev_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases_rev_clipped, view.DeletionTags());
+ EXPECT_EQ(tagQuals_rev_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(pulseQuals_rev_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(pulseQuals_rev_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_rev_clipped, view.IPD().Data());
+ EXPECT_EQ(pulseCall_rev_clipped, view.PulseCalls());
+ }
+}
+
+TEST(BamRecordClippingTest, ClipToQuery_WithSoftClips)
+{
+ const Position qStart = 500;
+ const Position qEnd = 515;
+ const string seq = "TTAACCGTTAGCAAA";
+ const string seq_rev = "TTTGCTAACGGTTAA";
+ const string quals = "--?]?]?]?]?*+++";
+ const string tagBases = "TTAACCGTTAGCAAA";
+ const string tagQuals = "--?]?]?]?]?*+++";
+ const string tagQuals_rev = "+++*?]?]?]?]?--";
+ const f_data frames = { 40, 40, 10, 10, 20, 20, 30, 40, 40, 10, 30, 20, 10, 10, 10 };
+ const f_data frames_rev = { 10, 10, 10, 20, 30, 10, 40, 40, 30, 20, 20, 10, 10, 40, 40 };
+
+ const int32_t tId = 0;
+ const Position tPos = 100;
+ const uint8_t mapQual = 80;
+
+ const Position clipStart = 502;
+ const Position clipEnd = 509;
+
+ const string s1_cigar = "2S10=3S";
+ const string s1_cigar_clipped = "7=";
+ const string s1_seq_clipped = "AACCGTT";
+ const string s1_quals_clipped = "?]?]?]?";
+ const string s1_tagBases_clipped = s1_seq_clipped;
+ const string s1_tagQuals_clipped = s1_quals_clipped;
+ const f_data s1_frames_clipped = { 10, 10, 20, 20, 30, 40, 40 };
+ const string s1_seq_rev_clipped = "AACGGTT";
+ const string s1_quals_rev_clipped = "?]?]?]?";
+ const string s1_tagBases_rev_clipped = s1_seq_rev_clipped;
+ const string s1_tagQuals_rev_clipped = s1_quals_rev_clipped;
+ const f_data s1_frames_rev_clipped = { 40, 40, 30, 20, 20, 10, 10 };
+
+ const string s2_cigar = "2S5=3D5=3S";
+ const string s2_cigar_clipped = "5=3D2=";
+ const string s2_seq_clipped = "AACCGTT";
+ const string s2_quals_clipped = "?]?]?]?";
+ const string s2_tagBases_clipped = s2_seq_clipped;
+ const string s2_tagQuals_clipped = s2_quals_clipped;
+ const f_data s2_frames_clipped = { 10, 10, 20, 20, 30, 40, 40 };
+ const string s2_seq_rev_clipped = "AACGGTT";
+ const string s2_quals_rev_clipped = "?]?]?]?";
+ const string s2_tagBases_rev_clipped = s2_seq_rev_clipped;
+ const string s2_tagQuals_rev_clipped = s2_quals_rev_clipped;
+ const f_data s2_frames_rev_clipped = { 40, 40, 30, 20, 20, 10, 10 };
+
+ const string s3_cigar = "2S4=1D2I2D4=3S";
+ const string s3_cigar_clipped = "4=1D2I2D1=";
+ const string s3_seq_clipped = "AACCGTT";
+ const string s3_quals_clipped = "?]?]?]?";
+ const string s3_tagBases_clipped = s3_seq_clipped;
+ const string s3_tagQuals_clipped = s3_quals_clipped;
+ const f_data s3_frames_clipped = { 10, 10, 20, 20, 30, 40, 40 };
+ const string s3_seq_rev_clipped = "AACGGTT";
+ const string s3_quals_rev_clipped = "?]?]?]?";
+ const string s3_tagBases_rev_clipped = s3_seq_rev_clipped;
+ const string s3_tagQuals_rev_clipped = s3_quals_rev_clipped;
+ const f_data s3_frames_rev_clipped = { 40, 40, 30, 20, 20, 10, 10 };
+
+ const BamRecord prototype = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames,
+ seq, tagBases, tagQuals, frames);
+ BamRecord s1 = prototype.Mapped(tId, tPos, Strand::FORWARD, s1_cigar, mapQual);
+ BamRecord s2 = prototype.Mapped(tId, tPos, Strand::FORWARD, s2_cigar, mapQual);
+ BamRecord s3 = prototype.Mapped(tId, tPos, Strand::FORWARD, s3_cigar, mapQual);
+ BamRecord s1_rev = prototype.Mapped(tId, tPos, Strand::REVERSE, s1_cigar, mapQual);
+ BamRecord s2_rev = prototype.Mapped(tId, tPos, Strand::REVERSE, s2_cigar, mapQual);
+ BamRecord s3_rev = prototype.Mapped(tId, tPos, Strand::REVERSE, s3_cigar, mapQual);
+
+ // sanity checks before clipping
+ EXPECT_TRUE(s1.IsMapped());
+ EXPECT_EQ(tPos, s1.ReferenceStart());
+ EXPECT_EQ(tPos + 10, s1.ReferenceEnd()); // 10=
+
+ EXPECT_TRUE(s1_rev.IsMapped());
+ EXPECT_EQ(tPos, s1_rev.ReferenceStart());
+ EXPECT_EQ(tPos + 10, s1_rev.ReferenceEnd()); // 10=
+
+ EXPECT_TRUE(s2.IsMapped());
+ EXPECT_EQ(tPos, s2.ReferenceStart());
+ EXPECT_EQ(tPos + 13, s2.ReferenceEnd()); // 5= + 3D + 5=
+
+ EXPECT_TRUE(s2_rev.IsMapped());
+ EXPECT_EQ(tPos, s2_rev.ReferenceStart());
+ EXPECT_EQ(tPos + 13, s2_rev.ReferenceEnd()); // 5= + 3D + 5=
+
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(tPos, s3.ReferenceStart());
+ EXPECT_EQ(tPos + 11, s3.ReferenceEnd()); // 4= + 1D + 2D + 4=
+
+ EXPECT_TRUE(s3_rev.IsMapped());
+ EXPECT_EQ(tPos, s3_rev.ReferenceStart());
+ EXPECT_EQ(tPos + 11, s3_rev.ReferenceEnd()); // 4= + 1D + 2D + 4=
+
+ s1.Clip(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+ s2.Clip(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+ s3.Clip(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+ s1_rev.Clip(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+ s2_rev.Clip(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+ s3_rev.Clip(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+
+ { // s1 - FORWARD
+
+ EXPECT_TRUE(s1.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s1.AlignedStrand());
+ EXPECT_EQ(clipStart, s1.QueryStart());
+ EXPECT_EQ(clipEnd, s1.QueryEnd());
+ EXPECT_EQ(clipStart, s1.AlignedStart()); // queryStart (no soft clips left)
+ EXPECT_EQ(clipEnd, s1.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(tPos, s1.ReferenceStart()); // tPos
+ EXPECT_EQ(tPos + 7, s1.ReferenceEnd()); // RefStart + 7=
+
+ EXPECT_EQ(s1_cigar_clipped, s1.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s1,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s1_seq_clipped, view.Sequence());
+ EXPECT_EQ(s1_quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s1_tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(s1_tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s1_frames_clipped, view.IPD().Data());
+ }
+
+ { // s1 - REVERSE
+
+ EXPECT_TRUE(s1_rev.IsMapped());
+ EXPECT_EQ(Strand::REVERSE, s1_rev.AlignedStrand());
+ EXPECT_EQ(clipStart, s1_rev.QueryStart());
+ EXPECT_EQ(clipEnd, s1_rev.QueryEnd());
+ EXPECT_EQ(clipStart, s1_rev.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(clipEnd, s1_rev.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(tPos, s1_rev.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(tPos + 7, s1_rev.ReferenceEnd()); // RefStart + 7=
+
+ EXPECT_EQ(s1_cigar_clipped, s1_rev.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s1_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s1_seq_rev_clipped, view.Sequence());
+ EXPECT_EQ(s1_quals_rev_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s1_tagBases_rev_clipped, view.DeletionTags());
+ EXPECT_EQ(s1_tagQuals_rev_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s1_frames_rev_clipped, view.IPD().Data());
+ }
+
+ { // s2 - FORWARD
+
+ EXPECT_TRUE(s2.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s2.AlignedStrand());
+ EXPECT_EQ(clipStart, s2.QueryStart());
+ EXPECT_EQ(clipEnd, s2.QueryEnd());
+ EXPECT_EQ(clipStart, s2.AlignedStart()); // queryStart (no soft clips left)
+ EXPECT_EQ(clipEnd, s2.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(tPos, s2.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(tPos + 10, s2.ReferenceEnd()); // RefStart + 5=3D2=
+
+ EXPECT_EQ(s2_cigar_clipped, s2.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s2,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s2_seq_clipped, view.Sequence());
+ EXPECT_EQ(s2_quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s2_tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(s2_tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s2_frames_clipped, view.IPD().Data());
+ }
+
+ { // s2 - REVERSE
+
+ EXPECT_TRUE(s2_rev.IsMapped());
+ EXPECT_EQ(Strand::REVERSE, s2_rev.AlignedStrand());
+ EXPECT_EQ(clipStart, s2_rev.QueryStart());
+ EXPECT_EQ(clipEnd, s2_rev.QueryEnd());
+ EXPECT_EQ(clipStart, s2_rev.AlignedStart()); // queryStart (no soft clips left)
+ EXPECT_EQ(clipEnd, s2_rev.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(tPos, s2_rev.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(tPos + 10, s2_rev.ReferenceEnd()); // RefStart + 5=3D2=
+
+ EXPECT_EQ(s2_cigar_clipped, s2_rev.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s2_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s2_seq_rev_clipped, view.Sequence());
+ EXPECT_EQ(s2_quals_rev_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s2_tagBases_rev_clipped, view.DeletionTags());
+ EXPECT_EQ(s2_tagQuals_rev_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s2_tagQuals_rev_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s2_frames_rev_clipped, view.IPD().Data());
+ }
+
+ { // s3 - FORWARD
+
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s3.AlignedStrand());
+ EXPECT_EQ(clipStart, s3.QueryStart());
+ EXPECT_EQ(clipEnd, s3.QueryEnd());
+ EXPECT_EQ(clipStart, s3.AlignedStart()); // queryStart (no soft clips left)
+ EXPECT_EQ(clipEnd, s3.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(tPos, s3.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(tPos + 8, s3.ReferenceEnd()); // RefStart + 4=1D2D1=
+
+ EXPECT_EQ(s3_cigar_clipped, s3.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s3_seq_clipped, view.Sequence());
+ EXPECT_EQ(s3_quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s3_tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(s3_tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s3_frames_clipped, view.IPD().Data());
+ }
+
+ { // s3 - REVERSE
+ EXPECT_TRUE(s3_rev.IsMapped());
+ EXPECT_EQ(Strand::REVERSE, s3_rev.AlignedStrand());
+ EXPECT_EQ(clipStart, s3_rev.QueryStart());
+ EXPECT_EQ(clipEnd, s3_rev.QueryEnd());
+ EXPECT_EQ(clipStart, s3_rev.AlignedStart()); // queryStart (no soft clips left)
+ EXPECT_EQ(clipEnd, s3_rev.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(tPos, s3_rev.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(tPos + 8, s3_rev.ReferenceEnd()); // RefStart + 4=1D2D1=
+
+ EXPECT_EQ(s3_cigar_clipped, s3_rev.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s3_seq_rev_clipped, view.Sequence());
+ EXPECT_EQ(s3_quals_rev_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s3_tagBases_rev_clipped, view.DeletionTags());
+ EXPECT_EQ(s3_tagQuals_rev_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_rev_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_rev_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s3_frames_rev_clipped, view.IPD().Data());
+ }
+}
+
+TEST(BamRecordClippingTest, ClipToReference_Basic)
+{
+ const Position qStart = 500;
+ const Position qEnd = 510;
+ const string seq = "AACCGTTAGC";
+ const string quals = "?]?]?]?]?*";
+ const string tagBases = "AACCGTTAGC";
+ const string tagQuals = "?]?]?]?]?*";
+ const string tagQuals_rev = "*?]?]?]?]?";
+ const f_data frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+
+ const int32_t tId = 0;
+ const Position tPos = 100;
+ const uint8_t mapQual = 80;
+
+ const Position clipStart = 102;
+ const Position clipEnd = 107;
+
+ const string s1_cigar = "10=";
+ const string s1_cigar_clipped = "5=";
+ const string s1_seq_clipped = "CCGTT";
+ const string s1_quals_clipped = "?]?]?";
+ const string s1_tagBases_clipped = s1_seq_clipped;
+ const string s1_tagQuals_clipped = s1_quals_clipped;
+ const f_data s1_frames_clipped = { 20, 20, 30, 40, 40 };
+ const string s1_seq_rev_clipped = "TAACG";
+ const string s1_quals_rev_clipped = "]?]?]";
+ const string s1_tagBases_rev_clipped = s1_seq_rev_clipped;
+ const string s1_tagQuals_rev_clipped = s1_quals_rev_clipped;
+ const f_data s1_frames_rev_clipped = { 10, 40, 40, 30, 20 };
+
+ const string s2_cigar = "5=3D5=";
+ const string s2_cigar_clipped = "3=2D";
+ const string s2_seq_clipped = "CCG";
+ const string s2_quals_clipped = "?]?";
+ const string s2_tagBases_clipped = s2_seq_clipped;
+ const string s2_tagQuals_clipped = s2_quals_clipped;
+ const f_data s2_frames_clipped = { 20, 20, 30 };
+ const string s2_seq_rev_clipped = "TAA";
+ const string s2_quals_rev_clipped = "]?]";
+ const string s2_tagBases_rev_clipped = s2_seq_rev_clipped;
+ const string s2_tagQuals_rev_clipped = s2_quals_rev_clipped;
+ const f_data s2_frames_rev_clipped = { 10, 40, 40 };
+
+ const string s3_cigar = "4=1D2I2D4=";
+ const string s3_cigar_clipped = "2=1D2I2D";
+ const string s3_seq_clipped = "CCGT";
+ const string s3_quals_clipped = "?]?]";
+ const string s3_tagBases_clipped = s3_seq_clipped;
+ const string s3_tagQuals_clipped = s3_quals_clipped;
+ const f_data s3_frames_clipped = { 20, 20, 30, 40 };
+ const string s3_seq_rev_clipped = "TAAC";
+ const string s3_quals_rev_clipped = "]?]?";
+ const string s3_tagBases_rev_clipped = s3_seq_rev_clipped;
+ const string s3_tagQuals_rev_clipped = s3_quals_rev_clipped;
+ const f_data s3_frames_rev_clipped = { 10, 40, 40, 30};
+
+ const BamRecord prototype = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames,
+ seq, tagBases, tagQuals, frames);
+ BamRecord s0 = prototype;
+ BamRecord s1 = prototype.Mapped(tId, tPos, Strand::FORWARD, s1_cigar, mapQual);
+ BamRecord s2 = prototype.Mapped(tId, tPos, Strand::FORWARD, s2_cigar, mapQual);
+ BamRecord s3 = prototype.Mapped(tId, tPos, Strand::FORWARD, s3_cigar, mapQual);
+ BamRecord s1_rev = prototype.Mapped(tId, tPos, Strand::REVERSE, s1_cigar, mapQual);
+ BamRecord s2_rev = prototype.Mapped(tId, tPos, Strand::REVERSE, s2_cigar, mapQual);
+ BamRecord s3_rev = prototype.Mapped(tId, tPos, Strand::REVERSE, s3_cigar, mapQual);
+
+ s0.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+ s1.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+ s2.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+ s3.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+ s1_rev.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+ s2_rev.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+ s3_rev.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+
+ { // s0 - no clipping should have been done to unmapped record
+
+ EXPECT_FALSE(s0.IsMapped());
+ EXPECT_EQ(prototype.QueryStart(), s0.QueryStart());
+ EXPECT_EQ(prototype.QueryEnd(), s0.QueryEnd());
+ EXPECT_EQ(prototype.AlignedStart(), s0.AlignedStart());
+ EXPECT_EQ(prototype.AlignedEnd(), s0.AlignedEnd());
+ EXPECT_EQ(prototype.ReferenceStart(), s0.ReferenceStart());
+ EXPECT_EQ(prototype.ReferenceEnd(), s0.ReferenceEnd());
+
+ const BamRecordView protoView
+ {
+ prototype,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ const BamRecordView view
+ {
+ s0,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(protoView.Sequence(), view.Sequence());
+ EXPECT_EQ(protoView.Qualities(), view.Qualities());
+ EXPECT_EQ(protoView.DeletionTags(), view.DeletionTags());
+ EXPECT_EQ(protoView.DeletionQVs(), view.DeletionQVs());
+ EXPECT_EQ(protoView.LabelQVs(), view.LabelQVs());
+ EXPECT_EQ(protoView.AltLabelQVs(), view.AltLabelQVs());
+ EXPECT_EQ(protoView.IPD(), view.IPD());
+ }
+
+ { // s1 - FORWARD
+
+ EXPECT_TRUE(s1.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s1.AlignedStrand());
+ EXPECT_EQ(502, s1.QueryStart());
+ EXPECT_EQ(507, s1.QueryEnd());
+ EXPECT_EQ(502, s1.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(507, s1.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(clipStart, s1.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s1.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s1_cigar_clipped, s1.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s1,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s1_seq_clipped, view.Sequence());
+ EXPECT_EQ(s1_quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s1_tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(s1_tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s1_tagQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s1_tagQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s1_frames_clipped, view.IPD().Data());
+ }
+
+ { // s1 - REVERSE
+
+ EXPECT_TRUE(s1_rev.IsMapped());
+ EXPECT_EQ(Strand::REVERSE, s1_rev.AlignedStrand());
+ EXPECT_EQ(503, s1_rev.QueryStart());
+ EXPECT_EQ(508, s1_rev.QueryEnd());
+ EXPECT_EQ(503, s1_rev.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(508, s1_rev.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(clipStart, s1_rev.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s1_rev.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s1_cigar_clipped, s1_rev.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s1_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s1_seq_rev_clipped, view.Sequence());
+ EXPECT_EQ(s1_quals_rev_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s1_tagBases_rev_clipped, view.DeletionTags());
+ EXPECT_EQ(s1_tagQuals_rev_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s1_tagQuals_rev_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s1_tagQuals_rev_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s1_frames_rev_clipped, view.IPD().Data());
+ }
+
+ { // s2 - FORWARD
+
+ EXPECT_TRUE(s2.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s2.AlignedStrand());
+ EXPECT_EQ(502, s2.QueryStart());
+ EXPECT_EQ(505, s2.QueryEnd());
+ EXPECT_EQ(502, s2.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(505, s2.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(clipStart, s2.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s2.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s2_cigar_clipped, s2.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s2,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s2_seq_clipped, view.Sequence());
+ EXPECT_EQ(s2_quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s2_tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(s2_tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s2_tagQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s2_tagQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s2_frames_clipped, view.IPD().Data());
+ }
+
+ { // s2 - REVERSE
+
+ EXPECT_TRUE(s2_rev.IsMapped());
+ EXPECT_EQ(Strand::REVERSE, s2_rev.AlignedStrand());
+ EXPECT_EQ(505, s2_rev.QueryStart());
+ EXPECT_EQ(508, s2_rev.QueryEnd());
+ EXPECT_EQ(505, s2_rev.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(508, s2_rev.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(clipStart, s2_rev.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s2_rev.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s2_cigar_clipped, s2_rev.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s2_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s2_seq_rev_clipped, view.Sequence());
+ EXPECT_EQ(s2_quals_rev_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s2_tagBases_rev_clipped, view.DeletionTags());
+ EXPECT_EQ(s2_tagQuals_rev_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s2_tagQuals_rev_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s2_tagQuals_rev_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s2_frames_rev_clipped, view.IPD().Data());
+ }
+
+ { // s3 - FORWARD
+
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s3.AlignedStrand());
+ EXPECT_EQ(502, s3.QueryStart());
+ EXPECT_EQ(506, s3.QueryEnd());
+ EXPECT_EQ(502, s3.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(506, s3.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(clipStart, s3.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s3.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s3_cigar_clipped, s3.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s3_seq_clipped, view.Sequence());
+ EXPECT_EQ(s3_quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s3_tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(s3_tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s3_frames_clipped, view.IPD().Data());
+ }
+
+ { // s3 - REVERSE
+
+ EXPECT_TRUE(s3_rev.IsMapped());
+ EXPECT_EQ(Strand::REVERSE, s3_rev.AlignedStrand());
+ EXPECT_EQ(504, s3_rev.QueryStart());
+ EXPECT_EQ(508, s3_rev.QueryEnd());
+ EXPECT_EQ(504, s3_rev.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(508, s3_rev.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(clipStart, s3_rev.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s3_rev.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s3_cigar_clipped, s3_rev.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s3_seq_rev_clipped, view.Sequence());
+ EXPECT_EQ(s3_quals_rev_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s3_tagBases_rev_clipped, view.DeletionTags());
+ EXPECT_EQ(s3_tagQuals_rev_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_rev_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_rev_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s3_frames_rev_clipped, view.IPD().Data());
+ }
+}
+
+TEST(BamRecordClippingTest, ClipToReference_WithSoftClips)
+{
+ const Position qStart = 500;
+ const Position qEnd = 515;
+ const string seq = "TTAACCGTTAGCAAA";
+ const string quals = "--?]?]?]?]?*+++";
+ const string tagBases = "TTAACCGTTAGCAAA";
+ const string tagQuals = "--?]?]?]?]?*+++";
+ const string tagQuals_rev = "+++*?]?]?]?]?--";
+ const f_data frames = { 40, 40, 10, 10, 20, 20, 30, 40, 40, 10, 30, 20, 10, 10, 10 };
+
+ const int32_t tId = 0;
+ const Position tPos = 100;
+ const uint8_t mapQual = 80;
+
+ const Position clipStart = 102;
+ const Position clipEnd = 107;
+
+ const string seq_rev = "TTTGCTAACGGTTAA";
+ const string quals_rev = "+++*?]?]?]?]?--";
+ const f_data frames_rev = { 10, 10, 10, 20, 30, 10, 40, 40, 30, 20, 20, 10, 10, 40, 40 };
+
+ const string s1_cigar = "2S10=3S";
+ const string s1_cigar_clipped = "5=";
+ const string s1_seq_clipped = "CCGTT";
+ const string s1_quals_clipped = "?]?]?";
+ const string s1_tagBases_clipped = s1_seq_clipped;
+ const string s1_tagQuals_clipped = s1_quals_clipped;
+ const f_data s1_frames_clipped = { 20, 20, 30, 40, 40 };
+ const string s1_seq_rev_clipped = "CTAAC";
+ const string s1_quals_rev_clipped = "?]?]?";
+ const string s1_tagBases_rev_clipped = s1_seq_rev_clipped;
+ const string s1_tagQuals_rev_clipped = s1_quals_rev_clipped;
+ const f_data s1_frames_rev_clipped = { 30, 10, 40, 40, 30 };
+
+ const string s2_cigar = "2S5=3D5=3S";
+ const string s2_cigar_clipped = "3=2D";
+ const string s2_seq_clipped = "CCG";
+ const string s2_quals_clipped = "?]?";
+ const string s2_tagBases_clipped = s2_seq_clipped;
+ const string s2_tagQuals_clipped = s2_quals_clipped;
+ const f_data s2_frames_clipped = { 20, 20, 30 };
+ const string s2_seq_rev_clipped = "CTA";
+ const string s2_quals_rev_clipped = "?]?";
+ const string s2_tagBases_rev_clipped = s2_seq_rev_clipped;
+ const string s2_tagQuals_rev_clipped = s2_quals_rev_clipped;
+ const f_data s2_frames_rev_clipped = { 30, 10, 40 };
+
+ const string s3_cigar = "2S4=1D2I2D4=3S";
+ const string s3_cigar_clipped = "2=1D2I2D";
+ const string s3_seq_clipped = "CCGT";
+ const string s3_quals_clipped = "?]?]";
+ const string s3_tagBases_clipped = s3_seq_clipped;
+ const string s3_tagQuals_clipped = s3_quals_clipped;
+ const f_data s3_frames_clipped = { 20, 20, 30, 40 };
+ const string s3_seq_rev_clipped = "CTAA";
+ const string s3_quals_rev_clipped = "?]?]";
+ const string s3_tagBases_rev_clipped = s3_seq_rev_clipped;
+ const string s3_tagQuals_rev_clipped = s3_quals_rev_clipped;
+ const f_data s3_frames_rev_clipped = { 30, 10, 40, 40 };
+
+ const BamRecord prototype = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames,
+ seq, tagBases, tagQuals, frames);
+ BamRecord s0 = prototype;
+ BamRecord s1 = prototype.Mapped(tId, tPos, Strand::FORWARD, s1_cigar, mapQual);
+ BamRecord s2 = prototype.Mapped(tId, tPos, Strand::FORWARD, s2_cigar, mapQual);
+ BamRecord s3 = prototype.Mapped(tId, tPos, Strand::FORWARD, s3_cigar, mapQual);
+ BamRecord s1_rev = prototype.Mapped(tId, tPos, Strand::REVERSE, s1_cigar, mapQual);
+ BamRecord s2_rev = prototype.Mapped(tId, tPos, Strand::REVERSE, s2_cigar, mapQual);
+ BamRecord s3_rev = prototype.Mapped(tId, tPos, Strand::REVERSE, s3_cigar, mapQual);
+
+ // sanity checks before clipping
+ EXPECT_FALSE(s0.IsMapped());
+
+ EXPECT_TRUE(s1.IsMapped());
+ EXPECT_EQ(500, s1.QueryStart()); // queryStart
+ EXPECT_EQ(515, s1.QueryEnd()); // queryStart + seqLength
+ EXPECT_EQ(502, s1.AlignedStart()); // queryStart + 2S
+ EXPECT_EQ(512, s1.AlignedEnd()); // alignedStart + 10=
+ EXPECT_EQ(tPos, s1.ReferenceStart()); // tPos
+ EXPECT_EQ(tPos + 10, s1.ReferenceEnd()); // tPos + 10=
+
+ EXPECT_TRUE(s1_rev.IsMapped());
+ EXPECT_EQ(500, s1_rev.QueryStart()); // queryStart
+ EXPECT_EQ(515, s1_rev.QueryEnd()); // queryStart + seqLength
+ EXPECT_EQ(503, s1_rev.AlignedStart()); // queryStart + 3S
+ EXPECT_EQ(513, s1_rev.AlignedEnd()); // alignedStart + 10=
+ EXPECT_EQ(tPos, s1_rev.ReferenceStart()); // tPos
+ EXPECT_EQ(tPos + 10, s1_rev.ReferenceEnd()); // tPos + 10=
+
+ EXPECT_TRUE(s2.IsMapped());
+ EXPECT_EQ(500, s2.QueryStart()); // queryStart
+ EXPECT_EQ(515, s2.QueryEnd()); // queryStart + seqLength
+ EXPECT_EQ(502, s2.AlignedStart()); // queryStart + 2S
+ EXPECT_EQ(512, s2.AlignedEnd()); // alignedStart + 5=5=
+ EXPECT_EQ(tPos, s2.ReferenceStart()); // tPos
+ EXPECT_EQ(tPos + 13, s2.ReferenceEnd()); // tPos + 5=3D5=
+
+ EXPECT_TRUE(s2_rev.IsMapped());
+ EXPECT_EQ(500, s2_rev.QueryStart()); // queryStart
+ EXPECT_EQ(515, s2_rev.QueryEnd()); // queryStart + seqLength
+ EXPECT_EQ(503, s2_rev.AlignedStart()); // queryStart + S
+ EXPECT_EQ(513, s2_rev.AlignedEnd()); // alignedStart + 5=5=
+ EXPECT_EQ(tPos, s2_rev.ReferenceStart()); // tPos
+ EXPECT_EQ(tPos + 13, s2_rev.ReferenceEnd()); // tPos + 5=3D5=
+
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(500, s3.QueryStart()); // queryStart
+ EXPECT_EQ(515, s3.QueryEnd()); // queryStart + seqLength
+ EXPECT_EQ(502, s3.AlignedStart()); // queryStart + 2S
+ EXPECT_EQ(512, s3.AlignedEnd()); // alignedStart + 4=2I4=
+ EXPECT_EQ(tPos, s3.ReferenceStart()); // tPos
+ EXPECT_EQ(tPos + 11, s3.ReferenceEnd()); // tPos + 4=1D2D4=
+
+ EXPECT_TRUE(s3_rev.IsMapped());
+ EXPECT_EQ(500, s3_rev.QueryStart()); // queryStart
+ EXPECT_EQ(515, s3_rev.QueryEnd()); // queryStart + seqLength
+ EXPECT_EQ(503, s3_rev.AlignedStart()); // queryStart + 2S
+ EXPECT_EQ(513, s3_rev.AlignedEnd()); // alignedStart + 4=2I4=
+ EXPECT_EQ(tPos, s3_rev.ReferenceStart()); // tPos
+ EXPECT_EQ(tPos + 11, s3_rev.ReferenceEnd()); // tPos + 4=1D2D4=
+
+ s0.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+ s1.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+ s2.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+ s3.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+ s1_rev.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+ s2_rev.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+ s3_rev.Clip(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+
+ { // s0 - no clipping should have been done to unmapped record
+
+ EXPECT_FALSE(s0.IsMapped());
+ EXPECT_EQ(prototype.QueryStart(), s0.QueryStart());
+ EXPECT_EQ(prototype.QueryEnd(), s0.QueryEnd());
+ EXPECT_EQ(prototype.AlignedStart(), s0.AlignedStart());
+ EXPECT_EQ(prototype.AlignedEnd(), s0.AlignedEnd());
+ EXPECT_EQ(prototype.ReferenceStart(), s0.ReferenceStart());
+ EXPECT_EQ(prototype.ReferenceEnd(), s0.ReferenceEnd());
+
+ const BamRecordView protoView
+ {
+ prototype,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ const BamRecordView view
+ {
+ s0,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(protoView.Sequence(), view.Sequence());
+ EXPECT_EQ(protoView.Qualities(), view.Qualities());
+ EXPECT_EQ(protoView.DeletionTags(), view.DeletionTags());
+ EXPECT_EQ(protoView.DeletionQVs(), view.DeletionQVs());
+ EXPECT_EQ(protoView.LabelQVs(), view.LabelQVs());
+ EXPECT_EQ(protoView.AltLabelQVs(), view.AltLabelQVs());
+ EXPECT_EQ(protoView.IPD(), view.IPD());
+ }
+
+ { // s1 - FORWARD
+
+ EXPECT_TRUE(s1.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s1.AlignedStrand());
+ EXPECT_EQ(504, s1.QueryStart()); // new queryStart
+ EXPECT_EQ(509, s1.QueryEnd()); // queryStart + new seqLength
+ EXPECT_EQ(504, s1.AlignedStart()); // queryStart (no soft clips remaining)
+ EXPECT_EQ(509, s1.AlignedEnd()); // alignStart + new seqLength
+ EXPECT_EQ(clipStart, s1.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s1.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s1_cigar_clipped, s1.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s1,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s1_seq_clipped, view.Sequence());
+ EXPECT_EQ(s1_quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s1_tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(s1_tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s1_tagQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s1_tagQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s1_frames_clipped, view.IPD().Data());
+ }
+
+ { // s1 - REVERSE
+
+ EXPECT_TRUE(s1_rev.IsMapped());
+ EXPECT_EQ(Strand::REVERSE, s1_rev.AlignedStrand());
+ EXPECT_EQ(506, s1_rev.QueryStart()); // new queryStart
+ EXPECT_EQ(511, s1_rev.QueryEnd()); // queryStart + new seqLength
+ EXPECT_EQ(506, s1_rev.AlignedStart()); // queryStart (no soft clips remaining)
+ EXPECT_EQ(511, s1_rev.AlignedEnd()); // alignStart + new seqLength
+ EXPECT_EQ(clipStart, s1_rev.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s1_rev.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s1_cigar_clipped, s1_rev.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s1_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s1_seq_rev_clipped, view.Sequence());
+ EXPECT_EQ(s1_quals_rev_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s1_tagBases_rev_clipped, view.DeletionTags());
+ EXPECT_EQ(s1_tagQuals_rev_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s1_tagQuals_rev_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s1_tagQuals_rev_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s1_frames_rev_clipped, view.IPD().Data());
+ }
+
+ { // s2 - FORWARD
+
+ EXPECT_TRUE(s2.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s2.AlignedStrand());
+ EXPECT_EQ(504, s2.QueryStart());
+ EXPECT_EQ(507, s2.QueryEnd());
+ EXPECT_EQ(504, s2.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(507, s2.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(clipStart, s2.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s2.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s2_cigar_clipped, s2.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s2,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s2_seq_clipped, view.Sequence());
+ EXPECT_EQ(s2_quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s2_tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(s2_tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s2_tagQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s2_tagQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s2_frames_clipped, view.IPD().Data());
+ }
+
+ { // s2 - REVERSE
+
+ EXPECT_TRUE(s2_rev.IsMapped());
+ EXPECT_EQ(Strand::REVERSE, s2_rev.AlignedStrand());
+ EXPECT_EQ(508, s2_rev.QueryStart()); // new queryStart
+ EXPECT_EQ(511, s2_rev.QueryEnd()); // queryStart + new seqLength
+ EXPECT_EQ(508, s2_rev.AlignedStart()); // queryStart (no soft clips remaining)
+ EXPECT_EQ(511, s2_rev.AlignedEnd()); // alignStart + new seqLength
+ EXPECT_EQ(clipStart, s2_rev.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s2_rev.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s2_cigar_clipped, s2_rev.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s2_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s2_seq_rev_clipped, view.Sequence());
+ EXPECT_EQ(s2_quals_rev_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s2_tagBases_rev_clipped, view.DeletionTags());
+ EXPECT_EQ(s2_tagQuals_rev_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s2_tagQuals_rev_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s2_tagQuals_rev_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s2_frames_rev_clipped, view.IPD().Data());
+ }
+
+ { // s3 - FORWARD
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s3.AlignedStrand());
+ EXPECT_EQ(504, s3.QueryStart());
+ EXPECT_EQ(508, s3.QueryEnd());
+ EXPECT_EQ(504, s3.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(508, s3.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(clipStart, s3.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s3.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s3_cigar_clipped, s3.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s3_seq_clipped, view.Sequence());
+ EXPECT_EQ(s3_quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s3_tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(s3_tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s3_frames_clipped, view.IPD().Data());
+ }
+
+ { // s3 - REVERSE
+ EXPECT_TRUE(s3_rev.IsMapped());
+ EXPECT_EQ(Strand::REVERSE, s3_rev.AlignedStrand());
+ EXPECT_EQ(507, s3_rev.QueryStart()); // new queryStart
+ EXPECT_EQ(511, s3_rev.QueryEnd()); // queryStart + new seqLength
+ EXPECT_EQ(507, s3_rev.AlignedStart()); // queryStart (no soft clips remaining)
+ EXPECT_EQ(511, s3_rev.AlignedEnd()); // alignStart + new seqLength
+ EXPECT_EQ(clipStart, s3_rev.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s3_rev.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s3_cigar_clipped, s3_rev.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s3_seq_rev_clipped, view.Sequence());
+ EXPECT_EQ(s3_quals_rev_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s3_tagBases_rev_clipped, view.DeletionTags());
+ EXPECT_EQ(s3_tagQuals_rev_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_rev_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_rev_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s3_frames_rev_clipped, view.IPD().Data());
+ }
+}
+
+TEST(BamRecordClippingTest, ClippedToQueryCopy)
+{
+ const Position qStart = 500;
+ const Position qEnd = 510;
+ const int32_t tId = 0;
+ const Position tPos = 100;
+ const uint8_t mapQual = 80;
+ const Position clipStart = 502;
+ const Position clipEnd = 509;
+
+ const string seq = "AACCGTTAGC";
+ const string quals = "?]?]?]?]?*";
+ const string tagBases = "AACCGTTAGC";
+ const string tagQuals = "?]?]?]?]?*";
+ const f_data frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+
+ const string seq_clipped = "CCGTTAG";
+ const string quals_clipped = "?]?]?]?";
+ const string tagBases_clipped = "CCGTTAG";
+ const string tagQuals_clipped = "?]?]?]?";
+ const f_data frames_clipped = { 20, 20, 30, 40, 40, 10, 30 };
+
+ const string s3_cigar = "4=1D2I2D4=";
+ const string s3_cigar_clipped = "2=1D2I2D3=";
+
+ BamRecord prototype = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames,
+ seq, tagBases, tagQuals, frames);
+ prototype.Map(tId, tPos, Strand::FORWARD, s3_cigar, mapQual);
+
+ BamRecord s3 = prototype.Clipped(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s3.AlignedStrand());
+ EXPECT_EQ(clipStart, s3.QueryStart());
+ EXPECT_EQ(clipEnd, s3.QueryEnd());
+ EXPECT_EQ(clipStart, s3.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(clipEnd, s3.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(102, s3.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(110, s3.ReferenceEnd()); // RefStart + 5= + 3D
+
+ EXPECT_EQ(s3_cigar_clipped, s3.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq_clipped, view.Sequence());
+ EXPECT_EQ(quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_clipped, view.IPD().Data());
+}
+
+TEST(BamRecordClippingTest, ClippedToReferenceCopy)
+{
+ const Position qStart = 500;
+ const Position qEnd = 510;
+ const string seq = "AACCGTTAGC";
+ const string quals = "?]?]?]?]?*";
+ const string tagBases = "AACCGTTAGC";
+ const string tagQuals = "?]?]?]?]?*";
+ const f_data frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+ const int32_t tId = 0;
+ const Position tPos = 100;
+ const uint8_t mapQual = 80;
+ const Position clipStart = 102;
+ const Position clipEnd = 107;
+
+ const string s3_cigar = "4=1D2I2D4=";
+ const string s3_cigar_clipped = "2=1D2I2D";
+ const string s3_seq_clipped = "CCGT";
+ const string s3_quals_clipped = "?]?]";
+ const string s3_tagBases_clipped = s3_seq_clipped;
+ const string s3_tagQuals_clipped = s3_quals_clipped;
+ const f_data s3_frames_clipped = { 20, 20, 30, 40 };
+
+ BamRecord prototype = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames,
+ seq, tagBases, tagQuals, frames);
+ prototype.Map(tId, tPos, Strand::FORWARD, s3_cigar, mapQual);
+
+ const BamRecord s3 = prototype.Clipped(ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+
+ // s3 - FORWARD
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s3.AlignedStrand());
+ EXPECT_EQ(502, s3.QueryStart());
+ EXPECT_EQ(506, s3.QueryEnd());
+ EXPECT_EQ(502, s3.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(506, s3.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(clipStart, s3.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s3.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s3_cigar_clipped, s3.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s3_seq_clipped, view.Sequence());
+ EXPECT_EQ(s3_quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s3_tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(s3_tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s3_frames_clipped, view.IPD().Data());
+}
+
+TEST(BamRecordClippingTest, StaticClippedToQuery)
+{
+ const Position qStart = 500;
+ const Position qEnd = 510;
+ const int32_t tId = 0;
+ const Position tPos = 100;
+ const uint8_t mapQual = 80;
+ const Position clipStart = 502;
+ const Position clipEnd = 509;
+
+ const string seq = "AACCGTTAGC";
+ const string quals = "?]?]?]?]?*";
+ const string tagBases = "AACCGTTAGC";
+ const string tagQuals = "?]?]?]?]?*";
+ const f_data frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+
+ const string seq_clipped = "CCGTTAG";
+ const string quals_clipped = "?]?]?]?";
+ const string tagBases_clipped = "CCGTTAG";
+ const string tagQuals_clipped = "?]?]?]?";
+ const f_data frames_clipped = { 20, 20, 30, 40, 40, 10, 30 };
+
+ const string s3_cigar = "4=1D2I2D4=";
+ const string s3_cigar_clipped = "2=1D2I2D3=";
+
+ BamRecord prototype = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames,
+ seq, tagBases, tagQuals, frames);
+ prototype.Map(tId, tPos, Strand::FORWARD, s3_cigar, mapQual);
+
+ const BamRecord s3 = BamRecord::Clipped(prototype, ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s3.AlignedStrand());
+ EXPECT_EQ(clipStart, s3.QueryStart());
+ EXPECT_EQ(clipEnd, s3.QueryEnd());
+ EXPECT_EQ(clipStart, s3.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(clipEnd, s3.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(102, s3.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(110, s3.ReferenceEnd()); // RefStart + 5= + 3D
+
+ EXPECT_EQ(s3_cigar_clipped, s3.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq_clipped, view.Sequence());
+ EXPECT_EQ(quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_clipped, view.IPD().Data());
+}
+
+TEST(BamRecordClippingTest, StaticClippedToReference)
+{
+ const Position qStart = 500;
+ const Position qEnd = 510;
+ const string seq = "AACCGTTAGC";
+ const string quals = "?]?]?]?]?*";
+ const string tagBases = "AACCGTTAGC";
+ const string tagQuals = "?]?]?]?]?*";
+ const f_data frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+ const int32_t tId = 0;
+ const Position tPos = 100;
+ const uint8_t mapQual = 80;
+ const Position clipStart = 102;
+ const Position clipEnd = 107;
+
+ const string s3_cigar = "4=1D2I2D4=";
+ const string s3_cigar_clipped = "2=1D2I2D";
+ const string s3_seq_clipped = "CCGT";
+ const string s3_quals_clipped = "?]?]";
+ const string s3_tagBases_clipped = s3_seq_clipped;
+ const string s3_tagQuals_clipped = s3_quals_clipped;
+ const f_data s3_frames_clipped = { 20, 20, 30, 40 };
+
+ BamRecord prototype = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames,
+ seq, tagBases, tagQuals, frames);
+ prototype.Map(tId, tPos, Strand::FORWARD, s3_cigar, mapQual);
+
+ const BamRecord s3 = BamRecord::Clipped(prototype, ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+
+ // s3 - FORWARD
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s3.AlignedStrand());
+ EXPECT_EQ(502, s3.QueryStart());
+ EXPECT_EQ(506, s3.QueryEnd());
+ EXPECT_EQ(502, s3.AlignedStart()); // queryStart (no soft clips)
+ EXPECT_EQ(506, s3.AlignedEnd()); // alignStart + seqLength
+ EXPECT_EQ(clipStart, s3.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s3.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s3_cigar_clipped, s3.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s3_seq_clipped, view.Sequence());
+ EXPECT_EQ(s3_quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s3_tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(s3_tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s3_frames_clipped, view.IPD().Data());
+}
+
+TEST(BamRecordTest, ClipCigarData)
+{
+ const Position qStart = 500;
+ const Position qEnd = 515;
+ const string seq = "TTAACCGTTAGCAAA";
+ const string quals = "--?]?]?]?]?*+++";
+ const string tagBases = "TTAACCGTTAGCAAA";
+ const string tagQuals = "--?]?]?]?]?*+++";
+ const f_data frames = { 40, 40, 10, 10, 20, 20, 30, 40, 40, 10, 30, 20, 10, 10, 10 };
+ const uint8_t mapQual = 80;
+ BamRecord s3 = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames,
+ seq, tagBases, tagQuals, frames);
+ BamRecord s3_rev = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames,
+ seq, tagBases, tagQuals, frames);
+
+ const string s3_cigar = "5H2S4=1D2I2D4=3S7H";
+ s3.Map(0, 100, Strand::FORWARD, s3_cigar, mapQual);
+ s3_rev.Map(0, 100, Strand::REVERSE, s3_cigar, mapQual);
+
+ const Cigar s3_cigar_raw = s3.CigarData();
+ const Cigar s3_cigar_clipped = s3.CigarData(true);
+
+ EXPECT_EQ(s3_cigar, s3_cigar_raw.ToStdString());
+ EXPECT_EQ(string("4=1D2I2D4="), s3_cigar_clipped.ToStdString());
+}
+
+TEST(BamRecordTest, CCS_ClipToQuery)
+{
+ const int32_t tId = 0;
+ const Position tPos = 100;
+ const uint8_t mapQual = 80;
+ const Position clipStart = 2;
+ const Position clipEnd = 9;
+
+ const string seq = "AACCGTTAGC";
+ const string quals = "?]?]?]?]?*";
+ const string tagBases = "AACCGTTAGC";
+ const string tagQuals = "?]?]?]?]?*";
+ const f_data frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+
+ const string seq_clipped = "CCGTTAG";
+ const string quals_clipped = "?]?]?]?";
+ const string tagBases_clipped = "CCGTTAG";
+ const string tagQuals_clipped = "?]?]?]?";
+ const f_data frames_clipped = { 20, 20, 30, 40, 40, 10, 30 };
+
+ const string s3_cigar = "4=1D2I2D4=";
+ const string s3_cigar_clipped = "2=1D2I2D3=";
+
+ BamRecord prototype = tests::MakeCCSRecord(seq, quals, tagBases, tagQuals, frames,
+ seq, tagBases, tagQuals, frames);
+ prototype.Map(tId, tPos, Strand::FORWARD, s3_cigar, mapQual);
+
+ BamRecord s3 = prototype.Clipped(ClipType::CLIP_TO_QUERY, clipStart, clipEnd);
+
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s3.AlignedStrand());
+ EXPECT_EQ(0, s3.AlignedStart()); // record start (no soft clips)
+ EXPECT_EQ(7, s3.AlignedEnd()); // alignStart + clipped seqLength
+ EXPECT_EQ(102, s3.ReferenceStart()); // 100 + startOffset
+ EXPECT_EQ(110, s3.ReferenceEnd()); // RefStart + 5= + 3D
+
+ EXPECT_EQ(s3_cigar_clipped, s3.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq_clipped, view.Sequence());
+ EXPECT_EQ(quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_clipped, view.IPD().Data());
+}
+
+TEST(BamRecordTest, CCS_ClipToReference)
+{
+ const string seq = "AACCGTTAGC";
+ const string quals = "?]?]?]?]?*";
+ const string tagBases = "AACCGTTAGC";
+ const string tagQuals = "?]?]?]?]?*";
+ const f_data frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+ const int32_t tId = 0;
+ const Position tPos = 100;
+ const uint8_t mapQual = 80;
+ const Position clipStart = 102;
+ const Position clipEnd = 107;
+
+ const string s3_cigar = "4=1D2I2D4=";
+ const string s3_cigar_clipped = "2=1D2I2D";
+ const string s3_seq_clipped = "CCGT";
+ const string s3_quals_clipped = "?]?]";
+ const string s3_tagBases_clipped = s3_seq_clipped;
+ const string s3_tagQuals_clipped = s3_quals_clipped;
+ const f_data s3_frames_clipped = { 20, 20, 30, 40 };
+
+ BamRecord prototype = tests::MakeCCSRecord(seq, quals, tagBases, tagQuals, frames,
+ seq, tagBases, tagQuals, frames);
+ prototype.Map(tId, tPos, Strand::FORWARD, s3_cigar, mapQual);
+
+ const BamRecord s3 = BamRecord::Clipped(prototype, ClipType::CLIP_TO_REFERENCE, clipStart, clipEnd);
+
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(Strand::FORWARD, s3.AlignedStrand());
+ EXPECT_EQ(0, s3.AlignedStart()); // record tart (no soft clips)
+ EXPECT_EQ(4, s3.AlignedEnd()); // alignStart + clipped seqLength (4)
+ EXPECT_EQ(clipStart, s3.ReferenceStart()); // clipStart
+ EXPECT_EQ(clipEnd, s3.ReferenceEnd()); // clipEnd
+
+ EXPECT_EQ(s3_cigar_clipped, s3.CigarData().ToStdString());
+
+ const BamRecordView view
+ {
+ s3,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(s3_seq_clipped, view.Sequence());
+ EXPECT_EQ(s3_quals_clipped, view.Qualities().Fastq());
+ EXPECT_EQ(s3_tagBases_clipped, view.DeletionTags());
+ EXPECT_EQ(s3_tagQuals_clipped, view.DeletionQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_clipped, view.LabelQVs().Fastq());
+ EXPECT_EQ(s3_tagQuals_clipped, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(s3_frames_clipped, view.IPD().Data());
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/BamRecordImpl.h>
+#include <pbbam/BamTagCodec.h>
+#include <pbbam/Tag.h>
+#include <pbbam/TagCollection.h>
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+namespace tests {
+
+struct Bam1Deleter
+{
+ void operator()(bam1_t* b) {
+ if (b)
+ bam_destroy1(b);
+ b = nullptr;
+ }
+};
+
+static
+BamRecordImpl CreateBamImpl(void)
+{
+ BamRecordImpl bam;
+ bam.Bin(42);
+ bam.Flag(42);
+ bam.InsertSize(42);
+ bam.MapQuality(42);
+ bam.MatePosition(42);
+ bam.MateReferenceId(42);
+ bam.Position(42);
+ bam.ReferenceId(42);
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = static_cast<int32_t>(-42);
+ bam.Tags(tags);
+
+ return bam;
+}
+
+static
+void CheckRawData(const BamRecordImpl& bam)
+{
+ // ensure raw data (lengths at least) matches API-facing data
+
+ const uint32_t expectedNameLength = bam.Name().size() + 1;
+ const uint32_t expectedNumCigarOps = bam.CigarData().size();
+ const int32_t expectedSeqLength = bam.Sequence().length();
+ const size_t expectedTagsLength = BamTagCodec::Encode(bam.Tags()).size();
+
+ // Name CIGAR Sequence Quals Tags
+ // l_qname + (n_cigar * 4) + (l_qseq+1)/2 + l_qseq + << TAGS >>
+
+ const int expectedTotalDataLength = expectedNameLength +
+ (expectedNumCigarOps * 4) +
+ (expectedSeqLength+1)/2 +
+ expectedSeqLength +
+ expectedTagsLength;
+
+ EXPECT_TRUE((bool)bam.d_);
+ EXPECT_EQ(expectedNameLength, bam.d_->core.l_qname);
+ EXPECT_EQ(expectedNumCigarOps, bam.d_->core.n_cigar);
+ EXPECT_EQ(expectedSeqLength, bam.d_->core.l_qseq);
+ EXPECT_EQ(expectedTotalDataLength, bam.d_->l_data);
+}
+
+} // namespace tests
+
+TEST(BamRecordImplCoreTest, RawDataDefaultValues)
+{
+ PBBAM_SHARED_PTR<bam1_t> rawData(bam_init1(), tests::Bam1Deleter());
+ ASSERT_TRUE((bool)rawData);
+
+ // fixed-length (core) data
+ EXPECT_EQ(0, rawData->core.tid);
+ EXPECT_EQ(0, rawData->core.pos);
+ EXPECT_EQ(0, rawData->core.bin);
+ EXPECT_EQ(0, rawData->core.qual);
+ EXPECT_EQ(0, rawData->core.l_qname);
+ EXPECT_EQ(0, rawData->core.flag);
+ EXPECT_EQ(0, rawData->core.n_cigar);
+ EXPECT_EQ(0, rawData->core.l_qseq);
+ EXPECT_EQ(0, rawData->core.mtid);
+ EXPECT_EQ(0, rawData->core.mpos);
+ EXPECT_EQ(0, rawData->core.isize);
+
+ // variable length data
+ EXPECT_EQ(0, rawData->data);
+ EXPECT_EQ(0, rawData->l_data);
+ EXPECT_EQ(0, rawData->m_data);
+}
+
+TEST(BamRecordImplCoreTest, DefaultValues)
+{
+ BamRecordImpl bam;
+
+ // -------------------------------
+ // check raw data
+ // -------------------------------
+
+ const PBBAM_SHARED_PTR<bam1_t> rawData = bam.d_;
+ ASSERT_TRUE((bool)rawData);
+
+ // fixed-length (core) data
+ // (forced init unmapped, with NULL-term as QNAME)
+ EXPECT_EQ(-1, rawData->core.tid);
+ EXPECT_EQ(-1, rawData->core.pos);
+ EXPECT_EQ(0, rawData->core.bin);
+ EXPECT_EQ(255, rawData->core.qual);
+ EXPECT_EQ(1, rawData->core.l_qname);
+ EXPECT_EQ(BamRecordImpl::UNMAPPED, rawData->core.flag);
+ EXPECT_EQ(0, rawData->core.n_cigar);
+ EXPECT_EQ(0, rawData->core.l_qseq);
+ EXPECT_EQ(-1, rawData->core.mtid);
+ EXPECT_EQ(-1, rawData->core.mpos);
+ EXPECT_EQ(0, rawData->core.isize);
+
+ // variable length data
+ EXPECT_TRUE(rawData->data != nullptr);
+ EXPECT_EQ(1, rawData->l_data);
+ EXPECT_EQ((int)0x800, rawData->m_data); // check this if we change or tune later
+
+ // -------------------------------
+ // check data via API calls
+ // -------------------------------
+
+ EXPECT_EQ(0, bam.Bin());
+ EXPECT_EQ(BamRecordImpl::UNMAPPED, bam.Flag());
+ EXPECT_EQ(0, bam.InsertSize());
+ EXPECT_EQ(255, bam.MapQuality());
+ EXPECT_EQ(-1, bam.MateReferenceId());
+ EXPECT_EQ(-1, bam.MatePosition());
+ EXPECT_EQ(-1, bam.Position());
+ EXPECT_EQ(-1, bam.ReferenceId());
+ EXPECT_EQ(0, bam.Tags().size());
+
+ EXPECT_FALSE(bam.IsDuplicate());
+ EXPECT_FALSE(bam.IsFailedQC());
+ EXPECT_FALSE(bam.IsFirstMate());
+ EXPECT_FALSE(bam.IsMapped());
+ EXPECT_TRUE(bam.IsMateMapped());
+ EXPECT_FALSE(bam.IsMateReverseStrand());
+ EXPECT_FALSE(bam.IsPaired());
+ EXPECT_TRUE(bam.IsPrimaryAlignment());
+ EXPECT_FALSE(bam.IsProperPair());
+ EXPECT_FALSE(bam.IsReverseStrand());
+ EXPECT_FALSE(bam.IsSecondMate());
+ EXPECT_FALSE(bam.IsSupplementaryAlignment());
+
+ const std::string emptyString = "";
+ EXPECT_EQ(emptyString, bam.Name());
+ EXPECT_EQ(emptyString, bam.CigarData().ToStdString());
+ EXPECT_EQ(emptyString, bam.Sequence());
+ EXPECT_EQ(emptyString, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplCoreTest, CoreSetters)
+{
+ BamRecordImpl bam;
+ bam.Bin(42);
+ bam.Flag(42);
+ bam.InsertSize(42);
+ bam.MapQuality(42);
+ bam.MatePosition(42);
+ bam.MateReferenceId(42);
+ bam.Position(42);
+ bam.ReferenceId(42);
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = static_cast<int32_t>(-42);
+ bam.Tags(tags); // (28 bytes encoded)
+
+ // -------------------------------
+ // check raw data
+ // -------------------------------
+
+ const PBBAM_SHARED_PTR<bam1_t> rawData = bam.d_;
+ ASSERT_TRUE((bool)rawData);
+
+ // fixed-length (core) data
+ EXPECT_EQ(42, rawData->core.tid);
+ EXPECT_EQ(42, rawData->core.pos);
+ EXPECT_EQ(42, rawData->core.bin);
+ EXPECT_EQ(42, rawData->core.qual);
+ EXPECT_EQ(1, rawData->core.l_qname); // initialized w/ NULL-term
+ EXPECT_EQ(42, rawData->core.flag);
+ EXPECT_EQ(0, rawData->core.n_cigar);
+ EXPECT_EQ(0, rawData->core.l_qseq);
+ EXPECT_EQ(42, rawData->core.mtid);
+ EXPECT_EQ(42, rawData->core.mpos);
+ EXPECT_EQ(42, rawData->core.isize);
+
+ // variable length data
+ EXPECT_TRUE(rawData->data != nullptr);
+ EXPECT_EQ(29, rawData->l_data); // NULL-term qname + tags
+ EXPECT_EQ((int)0x800, rawData->m_data); // check this if we change or tune later
+
+ // -------------------------------
+ // check data via API calls
+ // -------------------------------
+
+ EXPECT_EQ(42, bam.Bin());
+ EXPECT_EQ(42, bam.Flag());
+ EXPECT_EQ(42, bam.InsertSize());
+ EXPECT_EQ(42, bam.MapQuality());
+ EXPECT_EQ(42, bam.MateReferenceId());
+ EXPECT_EQ(42, bam.MatePosition());
+ EXPECT_EQ(42, bam.Position());
+ EXPECT_EQ(42, bam.ReferenceId());
+
+ const TagCollection& fetchedTags = bam.Tags();
+
+ EXPECT_TRUE(fetchedTags.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(std::string("1abc75"), fetchedTags.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), fetchedTags.at("XY").ToInt32());
+ EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), fetchedTags.at("CA").ToUInt8Array());
+}
+
+TEST(BamRecordImplCoreTest, DeepCopyFromRawData)
+{
+ // init raw data
+ PBBAM_SHARED_PTR<bam1_t> rawData(bam_init1(), tests::Bam1Deleter());
+ ASSERT_TRUE((bool)rawData);
+
+ rawData->core.tid = 42;
+ rawData->core.pos = 42;
+ rawData->core.bin = 42;
+ rawData->core.qual = 42;
+ rawData->core.flag = 42;
+ rawData->core.mtid = 42;
+ rawData->core.mpos = 42;
+ rawData->core.isize = 42;
+
+ const int32_t x = 42;
+ char valueBytes[sizeof x];
+ std::copy(static_cast<const char*>(static_cast<const void*>(&x)),
+ static_cast<const char*>(static_cast<const void*>(&x)) + sizeof x,
+ valueBytes);
+ bam_aux_append(rawData.get(), "XY", 'i', sizeof(x), (uint8_t*)&valueBytes[0]);
+
+ EXPECT_EQ(42, rawData->core.tid);
+ EXPECT_EQ(42, rawData->core.pos);
+ EXPECT_EQ(42, rawData->core.bin);
+ EXPECT_EQ(42, rawData->core.qual);
+ EXPECT_EQ(0, rawData->core.l_qname);
+ EXPECT_EQ(42, rawData->core.flag);
+ EXPECT_EQ(0, rawData->core.n_cigar);
+ EXPECT_EQ(0, rawData->core.l_qseq);
+ EXPECT_EQ(42, rawData->core.mtid);
+ EXPECT_EQ(42, rawData->core.mpos);
+ EXPECT_EQ(42, rawData->core.isize);
+ const int32_t fetchedX = bam_aux2i( bam_aux_get(rawData.get(), "XY") );
+ EXPECT_EQ(42, fetchedX);
+
+ // static "ctor"
+ BamRecordImpl bam = BamRecordImpl::FromRawData(rawData);
+
+ // make sure raw data is still valid
+ EXPECT_EQ(42, rawData->core.tid);
+ EXPECT_EQ(42, rawData->core.pos);
+ EXPECT_EQ(42, rawData->core.bin);
+ EXPECT_EQ(42, rawData->core.qual);
+ EXPECT_EQ(0, rawData->core.l_qname);
+ EXPECT_EQ(42, rawData->core.flag);
+ EXPECT_EQ(0, rawData->core.n_cigar);
+ EXPECT_EQ(0, rawData->core.l_qseq);
+ EXPECT_EQ(42, rawData->core.mtid);
+ EXPECT_EQ(42, rawData->core.mpos);
+ EXPECT_EQ(42, rawData->core.isize);
+ EXPECT_TRUE(rawData->data != nullptr);
+ EXPECT_TRUE(0 != rawData->l_data);
+ EXPECT_TRUE(0 != rawData->m_data);
+
+ // check new record
+ EXPECT_EQ(42, bam.Bin());
+ EXPECT_EQ(42, bam.Flag());
+ EXPECT_EQ(42, bam.InsertSize());
+ EXPECT_EQ(42, bam.MapQuality());
+ EXPECT_EQ(42, bam.MateReferenceId());
+ EXPECT_EQ(42, bam.MatePosition());
+ EXPECT_EQ(42, bam.Position());
+ EXPECT_EQ(42, bam.ReferenceId());
+ EXPECT_EQ(x, bam.Tags()["XY"].ToInt32());
+
+ EXPECT_TRUE(bam.d_->data != nullptr);
+ EXPECT_TRUE(bam.d_->m_data >= (int)0x800); // check this if we change or tune later
+
+ // tweak raw data, make sure we've done a deep copy (so BamRecordImpl isn't changed)
+ rawData->core.pos = 37;
+ EXPECT_EQ(37, rawData->core.pos);
+ EXPECT_EQ(42, bam.Position());
+ EXPECT_EQ(42, bam.d_->core.pos);
+}
+
+TEST(BamRecordImplCoreTest, CopyAssignment)
+{
+ BamRecordImpl bam1;
+ bam1.Bin(42);
+ bam1.Flag(42);
+ bam1.InsertSize(42);
+ bam1.MapQuality(42);
+ bam1.MatePosition(42);
+ bam1.MateReferenceId(42);
+ bam1.Position(42);
+ bam1.ReferenceId(42);
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = static_cast<int32_t>(-42);
+ bam1.Tags(tags);
+
+ BamRecordImpl bam2;
+ bam2 = bam1;
+
+ EXPECT_EQ(42, bam1.Bin());
+ EXPECT_EQ(42, bam1.Flag());
+ EXPECT_EQ(42, bam1.InsertSize());
+ EXPECT_EQ(42, bam1.MapQuality());
+ EXPECT_EQ(42, bam1.MateReferenceId());
+ EXPECT_EQ(42, bam1.MatePosition());
+ EXPECT_EQ(42, bam1.Position());
+ EXPECT_EQ(42, bam1.ReferenceId());
+
+ const TagCollection& fetchedTags1 = bam1.Tags();
+ EXPECT_TRUE(fetchedTags1.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(std::string("1abc75"), fetchedTags1.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), fetchedTags1.at("XY").ToInt32());
+ EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), fetchedTags1.at("CA").ToUInt8Array());
+
+ EXPECT_EQ(42, bam2.Bin());
+ EXPECT_EQ(42, bam2.Flag());
+ EXPECT_EQ(42, bam2.InsertSize());
+ EXPECT_EQ(42, bam2.MapQuality());
+ EXPECT_EQ(42, bam2.MateReferenceId());
+ EXPECT_EQ(42, bam2.MatePosition());
+ EXPECT_EQ(42, bam2.Position());
+ EXPECT_EQ(42, bam2.ReferenceId());
+
+ const TagCollection& fetchedTags2 = bam2.Tags();
+ EXPECT_TRUE(fetchedTags2.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(std::string("1abc75"), fetchedTags2.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), fetchedTags2.at("XY").ToInt32());
+ EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), fetchedTags2.at("CA").ToUInt8Array());
+
+ tests::CheckRawData(bam1);
+ tests::CheckRawData(bam2);
+}
+
+TEST(BamRecordImplCoreTest, SelfAssignmentTolerated)
+{
+ BamRecordImpl bam1;
+ bam1.Bin(42);
+ bam1.Flag(42);
+ bam1.InsertSize(42);
+ bam1.MapQuality(42);
+ bam1.MatePosition(42);
+ bam1.MateReferenceId(42);
+ bam1.Position(42);
+ bam1.ReferenceId(42);
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = static_cast<int32_t>(-42);
+ bam1.Tags(tags);
+
+ bam1 = bam1;
+
+ EXPECT_EQ(42, bam1.Bin());
+ EXPECT_EQ(42, bam1.Flag());
+ EXPECT_EQ(42, bam1.InsertSize());
+ EXPECT_EQ(42, bam1.MapQuality());
+ EXPECT_EQ(42, bam1.MateReferenceId());
+ EXPECT_EQ(42, bam1.MatePosition());
+ EXPECT_EQ(42, bam1.Position());
+ EXPECT_EQ(42, bam1.ReferenceId());
+
+ const TagCollection& fetchedTags1 = bam1.Tags();
+ EXPECT_TRUE(fetchedTags1.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(std::string("1abc75"), fetchedTags1.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), fetchedTags1.at("XY").ToInt32());
+ EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), fetchedTags1.at("CA").ToUInt8Array());
+
+ tests::CheckRawData(bam1);
+}
+
+TEST(BamRecordImplCoreTest, CopyConstructor)
+{
+ BamRecordImpl bam1;
+ bam1.Bin(42);
+ bam1.Flag(42);
+ bam1.InsertSize(42);
+ bam1.MapQuality(42);
+ bam1.MatePosition(42);
+ bam1.MateReferenceId(42);
+ bam1.Position(42);
+ bam1.ReferenceId(42);
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = static_cast<int32_t>(-42);
+ bam1.Tags(tags);
+
+ BamRecordImpl bam2(bam1);
+
+ EXPECT_EQ(42, bam1.Bin());
+ EXPECT_EQ(42, bam1.Flag());
+ EXPECT_EQ(42, bam1.InsertSize());
+ EXPECT_EQ(42, bam1.MapQuality());
+ EXPECT_EQ(42, bam1.MateReferenceId());
+ EXPECT_EQ(42, bam1.MatePosition());
+ EXPECT_EQ(42, bam1.Position());
+ EXPECT_EQ(42, bam1.ReferenceId());
+
+ const TagCollection& fetchedTags1 = bam1.Tags();
+ EXPECT_TRUE(fetchedTags1.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(std::string("1abc75"), fetchedTags1.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), fetchedTags1.at("XY").ToInt32());
+ EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), fetchedTags1.at("CA").ToUInt8Array());
+
+ EXPECT_EQ(42, bam2.Bin());
+ EXPECT_EQ(42, bam2.Flag());
+ EXPECT_EQ(42, bam2.InsertSize());
+ EXPECT_EQ(42, bam2.MapQuality());
+ EXPECT_EQ(42, bam2.MateReferenceId());
+ EXPECT_EQ(42, bam2.MatePosition());
+ EXPECT_EQ(42, bam2.Position());
+ EXPECT_EQ(42, bam2.ReferenceId());
+
+ const TagCollection& fetchedTags2 = bam2.Tags();
+ EXPECT_TRUE(fetchedTags2.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(std::string("1abc75"), fetchedTags2.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), fetchedTags2.at("XY").ToInt32());
+ EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), fetchedTags2.at("CA").ToUInt8Array());
+
+ tests::CheckRawData(bam1);
+ tests::CheckRawData(bam2);
+}
+
+TEST(BamRecordImplCoreTest, CreateRecord_InternalTest)
+{
+ BamRecordImpl bam = tests::CreateBamImpl();
+
+ EXPECT_EQ(42, bam.Bin());
+ EXPECT_EQ(42, bam.Flag());
+ EXPECT_EQ(42, bam.InsertSize());
+ EXPECT_EQ(42, bam.MapQuality());
+ EXPECT_EQ(42, bam.MateReferenceId());
+ EXPECT_EQ(42, bam.MatePosition());
+ EXPECT_EQ(42, bam.Position());
+ EXPECT_EQ(42, bam.ReferenceId());
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = static_cast<int32_t>(-42);
+ bam.Tags(tags);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplCoreTest, MoveAssignment)
+{
+ BamRecordImpl bam;
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpessimizing-move"
+#endif
+ bam = std::move(tests::CreateBamImpl());
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+ EXPECT_EQ(42, bam.Bin());
+ EXPECT_EQ(42, bam.Flag());
+ EXPECT_EQ(42, bam.InsertSize());
+ EXPECT_EQ(42, bam.MapQuality());
+ EXPECT_EQ(42, bam.MateReferenceId());
+ EXPECT_EQ(42, bam.MatePosition());
+ EXPECT_EQ(42, bam.Position());
+ EXPECT_EQ(42, bam.ReferenceId());
+
+ const TagCollection& fetchedTags1 = bam.Tags();
+ EXPECT_TRUE(fetchedTags1.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(std::string("1abc75"), fetchedTags1.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), fetchedTags1.at("XY").ToInt32());
+ EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), fetchedTags1.at("CA").ToUInt8Array());
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplCoreTest, MoveConstructor)
+{
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpessimizing-move"
+#endif
+ BamRecordImpl bam(std::move(tests::CreateBamImpl()));
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+ EXPECT_EQ(42, bam.Bin());
+ EXPECT_EQ(42, bam.Flag());
+ EXPECT_EQ(42, bam.InsertSize());
+ EXPECT_EQ(42, bam.MapQuality());
+ EXPECT_EQ(42, bam.MateReferenceId());
+ EXPECT_EQ(42, bam.MatePosition());
+ EXPECT_EQ(42, bam.Position());
+ EXPECT_EQ(42, bam.ReferenceId());
+
+ const TagCollection& fetchedTags1 = bam.Tags();
+ EXPECT_TRUE(fetchedTags1.at("HX").HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(std::string("1abc75"), fetchedTags1.at("HX").ToString());
+ EXPECT_EQ(static_cast<int32_t>(-42), fetchedTags1.at("XY").ToInt32());
+ EXPECT_EQ(std::vector<uint8_t>({34, 5, 125}), fetchedTags1.at("CA").ToUInt8Array());
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplCoreTest, AlignmentFlags)
+{
+ // same set of flags, different ways of getting there
+
+ // raw number
+ BamRecordImpl bam1;
+ bam1.Flag(1107);
+
+ // enum values
+ BamRecordImpl bam2;
+ bam2.Flag(BamRecordImpl::DUPLICATE |
+ BamRecordImpl::MATE_1 |
+ BamRecordImpl::REVERSE_STRAND |
+ BamRecordImpl::PROPER_PAIR |
+ BamRecordImpl::PAIRED
+ );
+
+ // convenience calls
+ BamRecordImpl bam3;
+ bam3.SetDuplicate(true);
+ bam3.SetFirstMate(true);
+ bam3.SetReverseStrand(true);
+ bam3.SetMapped(true);
+ bam3.SetMateMapped(true);
+ bam3.SetPaired(true);
+ bam3.SetProperPair(true);
+ bam3.SetPrimaryAlignment(true);
+
+ // make sure all are same
+ EXPECT_EQ(1107, bam1.Flag());
+ EXPECT_EQ(1107, bam2.Flag());
+ EXPECT_EQ(1107, bam3.Flag());
+
+ // check API calls
+ EXPECT_TRUE(bam1.IsPaired());
+ EXPECT_TRUE(bam1.IsProperPair());
+ EXPECT_TRUE(bam1.IsMapped());
+ EXPECT_TRUE(bam1.IsMateMapped());
+ EXPECT_TRUE(bam1.IsReverseStrand());
+ EXPECT_FALSE(bam1.IsMateReverseStrand());
+ EXPECT_TRUE(bam1.IsFirstMate());
+ EXPECT_FALSE(bam1.IsSecondMate());
+ EXPECT_TRUE(bam1.IsPrimaryAlignment());
+ EXPECT_FALSE(bam1.IsFailedQC());
+ EXPECT_TRUE(bam1.IsDuplicate());
+ EXPECT_FALSE(bam1.IsSupplementaryAlignment());
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/BamRecordImpl.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+// NOTE: these tests check "high-level" tag query/manipulation via BamRecordImpl.
+// For raw Tag/TagCollection tests, see test_Tags.cpp
+// For encoding tests, see test_BamRecordImplVariableData.cpp
+
+TEST(BamRecordImplTagsTest, HasTagTest)
+{
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Tags(tags);
+
+ EXPECT_TRUE(bam.HasTag("HX"));
+ EXPECT_TRUE(bam.HasTag("CA"));
+ EXPECT_TRUE(bam.HasTag("XY"));
+
+ EXPECT_FALSE(bam.HasTag("zz"));
+ EXPECT_FALSE(bam.HasTag(""));
+ EXPECT_FALSE(bam.HasTag("some_too_long_name"));
+
+ const TagCollection& fetchedTags = bam.Tags();
+ EXPECT_TRUE(fetchedTags.Contains("HX"));
+ EXPECT_TRUE(fetchedTags.Contains("CA"));
+ EXPECT_TRUE(fetchedTags.Contains("XY"));
+ EXPECT_FALSE(fetchedTags.Contains("zz"));
+ EXPECT_FALSE(fetchedTags.Contains(""));
+ EXPECT_FALSE(fetchedTags.Contains("some_too_long_name"));
+}
+
+TEST(BamRecordImplTagsTest, SimpleAddTag)
+{
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ BamRecordImpl bam;
+ bam.Tags(tags);
+
+ EXPECT_TRUE(bam.HasTag("HX"));
+ EXPECT_TRUE(bam.HasTag("CA"));
+ EXPECT_FALSE(bam.HasTag("XY"));
+
+ bam.AddTag("XY", (int32_t)-42);
+
+ EXPECT_TRUE(bam.HasTag("HX"));
+ EXPECT_TRUE(bam.HasTag("CA"));
+ EXPECT_TRUE(bam.HasTag("XY"));
+
+ const TagCollection& fetchedTags = bam.Tags();
+ EXPECT_TRUE(fetchedTags.Contains("HX"));
+ EXPECT_TRUE(fetchedTags.Contains("CA"));
+ EXPECT_TRUE(fetchedTags.Contains("XY"));
+ EXPECT_FALSE(fetchedTags.Contains("zz"));
+ EXPECT_FALSE(fetchedTags.Contains(""));
+ EXPECT_FALSE(fetchedTags.Contains("some_too_long_name"));
+
+ EXPECT_EQ(-42, fetchedTags.at("XY").ToInt32());
+
+ // fail on invalid adds
+ EXPECT_FALSE(bam.AddTag("", (int32_t)-42));
+ EXPECT_FALSE(bam.AddTag("some_too_long_name", (int32_t)-42));
+ EXPECT_FALSE(bam.AddTag("XY", (int32_t)-42)); // reject duplicate
+}
+
+TEST(BamRecordImplTagsTest, SimpleRemoveTag)
+{
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Tags(tags);
+
+ EXPECT_TRUE(bam.HasTag("HX"));
+ EXPECT_TRUE(bam.HasTag("CA"));
+ EXPECT_TRUE(bam.HasTag("XY"));
+
+ const bool removedOk = bam.RemoveTag("XY");
+ EXPECT_TRUE(removedOk);
+
+ EXPECT_TRUE(bam.HasTag("HX"));
+ EXPECT_TRUE(bam.HasTag("CA"));
+ EXPECT_FALSE(bam.HasTag("XY"));
+
+ const TagCollection& fetchedTags = bam.Tags();
+ EXPECT_TRUE(fetchedTags.Contains("HX"));
+ EXPECT_TRUE(fetchedTags.Contains("CA"));
+ EXPECT_FALSE(fetchedTags.Contains("XY"));
+ EXPECT_FALSE(fetchedTags.Contains("zz"));
+ EXPECT_FALSE(fetchedTags.Contains(""));
+ EXPECT_FALSE(fetchedTags.Contains("some_too_long_name"));
+
+ // fail on invalid removes
+ EXPECT_FALSE(bam.RemoveTag(""));
+ EXPECT_FALSE(bam.RemoveTag("some_too_long_name"));
+ EXPECT_FALSE(bam.RemoveTag("zz")); // reject remove unknown
+}
+
+TEST(BamRecordImplTagsTest, SimpleEditTag)
+{
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Tags(tags);
+
+ EXPECT_TRUE(bam.HasTag("XY"));
+
+ const TagCollection& fetchedTags = bam.Tags();
+ EXPECT_TRUE(fetchedTags.Contains("HX"));
+ EXPECT_TRUE(fetchedTags.Contains("CA"));
+ EXPECT_TRUE(fetchedTags.Contains("XY"));
+ EXPECT_EQ(-42, fetchedTags.at("XY").ToInt32());
+
+ const bool editedOk = bam.EditTag("XY", (int32_t)500);
+ EXPECT_TRUE(editedOk);
+ EXPECT_TRUE(bam.HasTag("XY"));
+
+ const TagCollection& fetchedTags2 = bam.Tags();
+ EXPECT_TRUE(fetchedTags2.Contains("HX"));
+ EXPECT_TRUE(fetchedTags2.Contains("CA"));
+ EXPECT_TRUE(fetchedTags2.Contains("XY"));
+ EXPECT_EQ(500, fetchedTags2.at("XY").ToInt32());
+
+ // fail on invalid edits
+ EXPECT_FALSE(bam.EditTag("", 500));
+ EXPECT_FALSE(bam.EditTag("some_too_long_name", 500));
+ EXPECT_FALSE(bam.EditTag("zz", 500)); // reject edit unknown
+}
+
+TEST(BamRecordImplTagsTest, SimpleQueryTag)
+{
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Tags(tags);
+
+ EXPECT_TRUE(bam.HasTag("XY"));
+ EXPECT_TRUE(bam.HasTag("CA"));
+ EXPECT_TRUE(bam.HasTag("XY"));
+
+ EXPECT_EQ(string("1abc75"), bam.TagValue("HX").ToString());
+ EXPECT_EQ(vector<uint8_t>({34, 5, 125}), bam.TagValue("CA").ToUInt8Array());
+ EXPECT_EQ((int32_t)-42, bam.TagValue("XY").ToInt32());
+
+ EXPECT_FALSE(bam.HasTag("zz"));
+ EXPECT_FALSE(bam.HasTag(""));
+ EXPECT_FALSE(bam.HasTag("some_too_long_name"));
+
+ EXPECT_EQ(Tag(), bam.TagValue("zz"));
+ EXPECT_EQ(Tag(), bam.TagValue(""));
+ EXPECT_EQ(Tag(), bam.TagValue("some_too_long_name"));
+}
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/BamRecordImpl.h>
+#include <pbbam/BamTagCodec.h>
+#include <pbbam/SamTagCodec.h>
+#include <pbbam/Tag.h>
+#include <pbbam/TagCollection.h>
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+
+// NOTE: this file has a *TON* of tests. Probably overkill, but I wanted to check
+// every possible combination of variable data, and then manipulate each
+// element within each combo to shrink & expand.
+
+namespace tests {
+
+static
+void CheckRawData(const BamRecordImpl& bam)
+{
+ // ensure raw data (lengths at least) matches API-facing data
+
+ const uint32_t expectedNameLength = bam.Name().size() + 1;
+ const uint32_t expectedNumCigarOps = bam.CigarData().size();
+ const int32_t expectedSeqLength = bam.Sequence().length();
+ const size_t expectedTagsLength = BamTagCodec::Encode(bam.Tags()).size();
+
+ // Name CIGAR Sequence Quals Tags
+ // l_qname + (n_cigar * 4) + (l_qseq+1)/2 + l_qseq + <encoded length>
+
+ const int expectedTotalDataLength = expectedNameLength +
+ (expectedNumCigarOps * 4) +
+ (expectedSeqLength+1)/2 +
+ expectedSeqLength +
+ expectedTagsLength;
+
+ EXPECT_EQ(expectedNameLength, bam.d_->core.l_qname);
+ EXPECT_EQ(expectedNumCigarOps, bam.d_->core.n_cigar);
+ EXPECT_EQ(expectedSeqLength, bam.d_->core.l_qseq);
+ EXPECT_EQ(expectedTotalDataLength, bam.d_->l_data);
+}
+
+} // namespace tests
+
+TEST(BamRecordImplVariableDataTest, InitEmpty)
+{
+ BamRecordImpl bam;
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, TagOnly_InitEmpty)
+{
+ BamRecordImpl bam;
+ bam.Tags(TagCollection());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, TagOnly_InitNormal)
+{
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Tags(tags);
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+}
+
+TEST(BamRecordImplVariableDataTest, TagOnly_ThenOverwriteWithLongerTags)
+{
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Tags(tags);
+ bam.Tags(longerTags);
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+}
+
+TEST(BamRecordImplVariableDataTest, TagOnly_ThenOverwriteWithShorterTags)
+{
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Tags(longerTags);
+ bam.Tags(tags);
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+}
+
+TEST(BamRecordImplVariableDataTest, TagOnly_ThenOverwriteWithEmptyTags)
+{
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Tags(tags);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarOnly_InitEmpty)
+{
+ BamRecordImpl bam;
+ bam.CigarData(std::string());
+ EXPECT_EQ(0, bam.CigarData().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarOnly_InitNormal_CigarObject)
+{
+ Cigar cigar;
+ cigar.push_back(CigarOperation('=', 100));
+
+ BamRecordImpl bam;
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(cigar, bam.CigarData());
+ EXPECT_TRUE("100=" == bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarOnly_InitNormal_StdString)
+{
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarOnly_ThenOverwriteWithLongerCigar)
+{
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ BamRecordImpl bam;
+ bam.CigarData(cigar);
+ bam.CigarData(longerCigar);
+
+ EXPECT_EQ(longerCigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarOnly_ThenOverwriteWithShorterCigar)
+{
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ BamRecordImpl bam;
+ bam.CigarData(longerCigar);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarOnly_ThenOverwriteWithEmptyCigar)
+{
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ BamRecordImpl bam;
+ bam.CigarData(cigar);
+ bam.CigarData(empty);
+
+ EXPECT_EQ(empty, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarTag_Init_Normal)
+{
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarTag_Init_EmptyCigar)
+{
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.CigarData(empty);
+
+ EXPECT_EQ(empty, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarTag_Init_EmptyTag)
+{
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.CigarData(cigar);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarTag_ThenOverwriteWithLongerCigar)
+{
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.CigarData(longerCigar);
+
+ EXPECT_EQ(longerCigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarTag_ThenOverwriteWithShorterCigar)
+{
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.CigarData(longerCigar);
+ bam.Tags(tags);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarTag_ThenOverwriteWithEmptyCigar)
+{
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.CigarData(empty);
+
+ EXPECT_EQ(empty, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarTag_ThenOverwriteWithLongerTags)
+{
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Tags(longerTags);
+
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarTag_ThenOverwriteWithShorterTags)
+{
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.CigarData(cigar);
+ bam.Tags(longerTags);
+ bam.Tags(tags);
+
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, CigarTag_ThenOverwriteWithEmptyTags)
+{
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ BamRecordImpl bam;
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualOnly_Init_Empty)
+{
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(std::string(), std::string());
+ EXPECT_EQ(0, bam.Sequence().size());
+ EXPECT_EQ(0, bam.Qualities().Fastq().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualOnly_Init_NormalQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualOnly_Init_EmptyQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualOnly_Init_Preencoded) {
+
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ const size_t encodedLength = static_cast<size_t>((sequence.size()+1)/2);
+ char* encoded = (char*)::calloc(encodedLength, sizeof(char));
+ char* e = encoded;
+
+ uint8_t nucleotideCode;
+ bool useHighWord = true;
+ for (size_t i = 0; i < sequence.size(); ++i) {
+ switch (sequence.at(i)) {
+ case 'A' : nucleotideCode = 1; break;
+ case 'C' : nucleotideCode = 2; break;
+ case 'G' : nucleotideCode = 4; break;
+ case 'T' : nucleotideCode = 8; break;
+ default:
+ EXPECT_FALSE(true);
+ break;
+ }
+
+ // pack the nucleotide code
+ if (useHighWord) {
+ *e = nucleotideCode << 4;
+ useHighWord = false;
+ } else {
+ *e |= nucleotideCode;
+ ++e;
+ useHighWord = true;
+ }
+ }
+
+ BamRecordImpl bam;
+ bam.SetPreencodedSequenceAndQualities(encoded, sequence.size(), qualities.c_str());
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+
+ if (encoded)
+ free(encoded);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualOnly_Init_Preencoded_EmptyQual) {
+
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+
+ const size_t encodedLength = static_cast<size_t>((sequence.size()+1)/2);
+ char* encoded = (char*)::calloc(encodedLength, sizeof(char));
+ char* e = encoded;
+
+ uint8_t nucleotideCode;
+ bool useHighWord = true;
+ for (size_t i = 0; i < sequence.size(); ++i) {
+ switch (sequence.at(i)) {
+ case 'A' : nucleotideCode = 1; break;
+ case 'C' : nucleotideCode = 2; break;
+ case 'G' : nucleotideCode = 4; break;
+ case 'T' : nucleotideCode = 8; break;
+ default:
+ EXPECT_FALSE(true);
+ break;
+ }
+
+ // pack the nucleotide code
+ if (useHighWord) {
+ *e = nucleotideCode << 4;
+ useHighWord = false;
+ } else {
+ *e |= nucleotideCode;
+ ++e;
+ useHighWord = true;
+ }
+ }
+
+ BamRecordImpl bam;
+ bam.SetPreencodedSequenceAndQualities(encoded, sequence.size(), qualities.c_str());
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+
+ if (encoded)
+ free(encoded);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualOnly_ThenOverwriteWithLongerSeq_NormalQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualOnly_ThenOverwriteWithLongerSeq_EmptyQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualOnly_ThenOverwriteWithShorterSeq_NormalQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualOnly_ThenOverwriteWithShorterSeq_EmptyQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualOnly_ThenOverwriteWithEmptySeq)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string empty = "";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.SetSequenceAndQualities(empty, empty);
+
+ EXPECT_EQ(empty, bam.Sequence());
+ EXPECT_EQ(empty, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualTag_Init_Normal)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualTag_Init_EmptySeqQual)
+{
+ const std::string sequence = "";
+ const std::string qualities = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualTag_Init_EmptyQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualTag_Init_EmptyTag)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualTag_ThenOverwriteWithLongerSeq_NormalQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualTag_ThenOverwriteWithLongerSeq_EmptyQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualTag_ThenOverwriteWithShorterSeq_NormalQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualTag_ThenOverwriteWithShorterSeq_EmptyQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualTag_ThenOverwriteWithEmptySeq)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string empty = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(empty, empty);
+
+ EXPECT_EQ(empty, bam.Sequence());
+ EXPECT_EQ(empty, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualTag_ThenOverwriteWithLongerTags)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+ bam.Tags(longerTags);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualTag_ThenOverwriteWithShorterTags)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(longerTags);
+ bam.Tags(tags);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualTag_ThenOverwriteWithEmptyTags)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigar_Init_Normal)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigar_Init_EmptySeqQual)
+{
+ const std::string sequence = "";
+ const std::string qualities = "";
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigar_Init_EmptyQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigar_Init_EmptyCigar)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigar_ThenOverwriteWithLongerSeq_NormalQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.CigarData(cigar);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigar_ThenOverwriteWithLongerSeq_EmptyQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.CigarData(cigar);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigar_ThenOverwriteWithShorterSeq_NormalQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigar_ThenOverwriteWithShorterSeq_EmptyQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigar_ThenOverwriteWithEmptySeq)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.SetSequenceAndQualities(empty, empty);
+
+ EXPECT_EQ(empty, bam.Sequence());
+ EXPECT_EQ(empty, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigar_ThenOverwriteWithLongerCigar)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.CigarData(longerCigar);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(longerCigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigar_ThenOverwriteWithShorterCigar)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(longerCigar);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigar_ThenOverwriteWithEmptyCigar)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.CigarData(empty);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(empty, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_Init_Normal)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_Init_EmptySeqQual)
+{
+ const std::string sequence = "";
+ const std::string qualities = "";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_Init_EmptyQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_Init_EmptyCigar)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_Init_EmptyTag)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_ThenOverwriteWithLongerSeq_NormalQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_ThenOverwriteWithLongerSeq_EmptyQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_ThenOverwriteWithShorterSeq_NormalQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_ThenOverwriteWithShorterSeq_EmptyQual)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_ThenOverwriteWithEmptySeq)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(empty, empty);
+
+ EXPECT_EQ(empty, bam.Sequence());
+ EXPECT_EQ(empty, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_ThenOverwriteWithLongerCigar)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.CigarData(longerCigar);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(longerCigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_ThenOverwriteWithShorterCigar)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(longerCigar);
+ bam.Tags(tags);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_ThenOverwriteWithEmptyCigar)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.CigarData(empty);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(empty, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_ThenOverwriteWithLongerTags)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Tags(longerTags);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_ThenOverwriteWithShorterTags)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(longerTags);
+ bam.Tags(tags);
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, SeqQualCigarTag_ThenOverwriteWithEmptyTags)
+{
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameOnly_InitEmpty)
+{
+ BamRecordImpl bam;
+ bam.Name(std::string());
+ EXPECT_EQ(0, bam.Name().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameOnly_InitNormal)
+{
+ const std::string readName = "foo";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+
+ EXPECT_EQ(readName, bam.Name());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameOnly_ThenOverwriteWithLongerName)
+{
+ const std::string readName = "foo";
+ const std::string longerName = "this is a long read name";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.Name(longerName);
+
+ EXPECT_EQ(longerName, bam.Name());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameOnly_ThenOverwriteWithShorterName)
+{
+ const std::string readName = "foo";
+ const std::string longerName = "this is a long read name";
+
+ BamRecordImpl bam;
+ bam.Name(longerName);
+ bam.Name(readName);
+
+ EXPECT_EQ(readName, bam.Name());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameOnly_ThenOverwriteWithEmptyName)
+{
+ const std::string readName = "foo";
+ const std::string emptyName = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.Name(emptyName);
+
+ EXPECT_EQ(emptyName, bam.Name());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameTag_Init_Normal)
+{
+ const std::string readName = "foo";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameTag_Init_EmptyName)
+{
+ const std::string readName = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameTag_Init_EmptyTag)
+{
+ const std::string readName = "foo";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameTag_ThenOverwriteWithLongerName)
+{
+ const std::string readName = "foo";
+ const std::string longerName = "this is a long read name";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.Tags(tags);
+ bam.Name(longerName);
+
+ EXPECT_EQ(longerName, bam.Name());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameTag_ThenOverwriteWithShorterName)
+{
+ const std::string readName = "foo";
+ const std::string longerName = "this is a long read name";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(longerName);
+ bam.Tags(tags);
+ bam.Name(readName);
+
+ EXPECT_EQ(readName, bam.Name());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameTag_ThenOverwriteWithEmptyName)
+{
+ const std::string readName = "foo";
+ const std::string empty = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.Tags(tags);
+ bam.Name(empty);
+
+ EXPECT_EQ(empty, bam.Name());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameTag_ThenOverwriteWithLongerTags)
+{
+ const std::string readName = "foo";
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.Tags(tags);
+ bam.Tags(longerTags);
+
+ EXPECT_EQ(readName, bam.Name());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameTag_ThenOverwriteWithShorterTags)
+{
+ const std::string readName = "foo";
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.Tags(longerTags);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameTag_ThenOverwriteWithEmptyTags)
+{
+ const std::string readName = "foo";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.Tags(tags);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigar_Init_Normal)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigar_Init_EmptyName)
+{
+ const std::string readName = "";
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigar_Init_EmptyCigar)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigar_ThenOverwriteWithLongerName)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+ const std::string longerName = "this is a long read name";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.Name(longerName);
+
+ EXPECT_EQ(longerName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigar_ThenOverwriteWithShorterName)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+ const std::string longerName = "this is a long read name";
+
+ BamRecordImpl bam;
+ bam.Name(longerName);
+ bam.CigarData(cigar);
+ bam.Name(readName);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigar_ThenOverwriteWithEmptyName)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.Name(empty);
+
+ EXPECT_EQ(empty, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigar_ThenOverwriteWithLongerCigar)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.CigarData(longerCigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(longerCigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigar_ThenOverwriteWithShorterCigar)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(longerCigar);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigar_ThenOverwriteWithEmptyCigar)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.CigarData(empty);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(empty, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigarTag_Init_Normal)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigarTag_Init_EmptyName)
+{
+ const std::string readName = "";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigarTag_Init_EmptyCigar)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigarTag_Init_EmptyTag)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigarTag_ThenOverwriteWithLongerName)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+ const std::string longerName = "this is a long read name";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Name(longerName);
+
+ EXPECT_EQ(longerName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigarTag_ThenOverwriteWithShorterName)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+ const std::string longerName = "this is a long read name";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(longerName);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Name(readName);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigarTag_ThenOverwriteWithEmptyName)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Name(empty);
+
+ EXPECT_EQ(empty, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigarTag_ThenOverwriteWithLongerCigar)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.CigarData(longerCigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(longerCigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigarTag_ThenOverwriteWithShorterCigar)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(longerCigar);
+ bam.Tags(tags);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigarTag_ThenOverwriteWithEmptyCigar)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.CigarData(empty);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(empty, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigarTag_ThenOverwriteWithLongerTags)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Tags(longerTags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigarTag_ThenOverwriteWithShorterTags)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.Tags(longerTags);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameCigarTag_ThenOverwriteWithEmptyTags)
+{
+ const std::string readName = "foo";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQual_Init_Normal)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQual_Init_EmptySeqQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "";
+ const std::string qualities = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQual_Init_EmptyQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQual_ThenOverwriteWithLongerName)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string longerName = "this is a long read name";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Name(longerName);
+
+ EXPECT_EQ(longerName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQual_ThenOverwriteWithShorterName)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string longerName = "this is a long read name";
+
+ BamRecordImpl bam;
+ bam.Name(longerName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Name(readName);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQual_ThenOverwriteWithEmptyName)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string empty = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Name(empty);
+
+ EXPECT_EQ(empty, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQual_ThenOverwriteWithLongerSeq_NormalQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQual_ThenOverwriteWithLongerSeq_EmptyQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQual_ThenOverwriteWithShorterSeq_NormalQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQual_ThenOverwriteWithShorterSeq_EmptyQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQual_ThenOverwriteWithEmptySeq)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string empty = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.SetSequenceAndQualities(empty, empty);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(empty, bam.Sequence());
+ EXPECT_EQ(empty, bam.Qualities().Fastq());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_Init_Normal)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_Init_EmptyName)
+{
+ const std::string readName = "";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_Init_EmptySeqQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "";
+ const std::string qualities = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_Init_EmptyQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_Init_EmptyTag)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(0, bam.Tags().size());
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_ThenOverwriteWithLongerName)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string longerName = "this is a long read name";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+ bam.Name(longerName);
+
+ EXPECT_EQ(longerName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_ThenOverwriteWithShorterName)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string longerName = "this is a long read name";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(longerName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+ bam.Name(readName);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_ThenOverwriteWithEmptyName)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string empty = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+ bam.Name(empty);
+
+ EXPECT_EQ(empty, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_ThenOverwriteWithLongerSeq_NormalQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_ThenOverwriteWithLongerSeq_EmptyQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_ThenOverwriteWithShorterSeq_NormalQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_ThenOverwriteWithShorterSeq_EmptyQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_ThenOverwriteWithEmptySeq)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string empty = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(empty, empty);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(empty, bam.Sequence());
+ EXPECT_EQ(empty, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_ThenOverwriteWithLongerTags)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+ bam.Tags(longerTags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_ThenOverwriteWithShorterTags)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(longerTags);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualTag_ThenOverwriteWithEmptyTags)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.Tags(tags);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_Init_Normal)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_Init_EmptyName)
+{
+ const std::string readName = "";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_Init_EmptySeqQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "";
+ const std::string qualities = "";
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_Init_EmptyQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_Init_EmptyCigar)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_ThenOverwriteWithLongerName)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string longerName = "this is a long read name";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Name(longerName);
+
+ EXPECT_EQ(longerName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_ThenOverwriteWithShorterName)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string longerName = "this is a long read name";
+
+ BamRecordImpl bam;
+ bam.Name(longerName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Name(readName);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_ThenOverwriteWithEmptyName)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Name(empty);
+
+ EXPECT_EQ(empty, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_ThenOverwriteWithLongerSeq_NormalQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.CigarData(cigar);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_ThenOverwriteWithLongerSeq_EmptyQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.CigarData(cigar);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_ThenOverwriteWithShorterSeq_NormalQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_ThenOverwriteWithShorterSeq_EmptyQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_ThenOverwriteWithEmptySeq)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.SetSequenceAndQualities(empty, empty);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(empty, bam.Sequence());
+ EXPECT_EQ(empty, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_ThenOverwriteWithLongerCigar)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.CigarData(longerCigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(longerCigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_ThenOverwriteWithShorterCigar)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(longerCigar);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigar_ThenOverwriteWithEmptyCigar)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.CigarData(empty);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(empty, bam.CigarData().ToStdString());
+ tests::CheckRawData(bam);
+}
+
+// @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_Init_Normal)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_Init_EmptyName)
+{
+ const std::string readName = "";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_Init_EmptySeqQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "";
+ const std::string qualities = "";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_Init_EmptyQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_Init_EmptyCigar)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_Init_EmptyTag)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithLongerName)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string longerName = "this is a long read name";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Name(longerName);
+
+ EXPECT_EQ(longerName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithShorterName)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string longerName = "this is a long read name";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(longerName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Name(readName);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithEmptyName)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Name(empty);
+
+ EXPECT_EQ(empty, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithLongerSeq_NormalQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithLongerSeq_EmptyQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(sequence, qualities);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithShorterSeq_NormalQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "?]?]";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithShorterSeq_EmptyQual)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string shortSeq = "ACGT";
+ const std::string shortQual = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(shortSeq, shortQual);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(shortSeq, bam.Sequence());
+ EXPECT_EQ(shortQual, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithEmptySeq)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.SetSequenceAndQualities(empty, empty);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(empty, bam.Sequence());
+ EXPECT_EQ(empty, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithLongerCigar)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.CigarData(longerCigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(longerCigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithShorterCigar)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string longerCigar = "100=10D100=10I100X";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(longerCigar);
+ bam.Tags(tags);
+ bam.CigarData(cigar);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithEmptyCigar)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+ const std::string empty = "";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.CigarData(empty);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(empty, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithLongerTags)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Tags(longerTags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithShorterTags)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ TagCollection longerTags;
+ longerTags["HX"] = std::string("1abc75");
+ longerTags["HX"].Modifier(TagModifier::HEX_STRING);
+ longerTags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ longerTags["XY"] = (int32_t)-42;
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(longerTags);
+ bam.Tags(tags);
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+
+ std::string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+
+ const std::string sam = SamTagCodec::Encode(bam.Tags());
+ EXPECT_EQ(expected, sam);
+
+ tests::CheckRawData(bam);
+}
+
+TEST(BamRecordImplVariableDataTest, NameSeqQualCigarTag_ThenOverwriteWithEmptyTags)
+{
+ const std::string readName = "foo";
+ const std::string sequence = "ACGTACGTACGT";
+ const std::string qualities = "?]?]?]?]?]?]";
+ const std::string cigar = "100=";
+
+ TagCollection tags;
+ tags["HX"] = std::string("1abc75");
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+ tags["CA"] = std::vector<uint8_t>({34, 5, 125});
+ tags["XY"] = (int32_t)-42;
+
+ BamRecordImpl bam;
+ bam.Name(readName);
+ bam.SetSequenceAndQualities(sequence, qualities);
+ bam.CigarData(cigar);
+ bam.Tags(tags);
+ bam.Tags(TagCollection());
+
+ EXPECT_EQ(readName, bam.Name());
+ EXPECT_EQ(sequence, bam.Sequence());
+ EXPECT_EQ(qualities, bam.Qualities().Fastq());
+ EXPECT_EQ(cigar, bam.CigarData().ToStdString());
+ EXPECT_EQ(0, bam.Tags().size());
+ tests::CheckRawData(bam);
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/BamRecordView.h>
+#include <pbbam/BamTagCodec.h>
+#include <chrono>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+typedef vector<uint16_t> f_data;
+
+namespace tests {
+
+static
+BamRecord MakeRecord(const Position qStart,
+ const Position qEnd,
+ const string& seq,
+ const string& quals,
+ const string& tagBases,
+ const string& tagQuals,
+ const f_data& frames)
+{
+ BamRecordImpl impl;
+ impl.SetSequenceAndQualities(seq, quals);
+
+ TagCollection tags;
+ tags["qs"] = qStart;
+ tags["qe"] = qEnd;
+ tags["ip"] = frames;
+ tags["pw"] = frames;
+ tags["dt"] = tagBases;
+ tags["st"] = tagBases;
+ tags["dq"] = tagQuals;
+ tags["iq"] = tagQuals;
+ tags["mq"] = tagQuals;
+ tags["sq"] = tagQuals;
+ tags["pq"] = tagQuals;
+ tags["pv"] = tagQuals;
+ impl.Tags(tags);
+
+ return BamRecord(std::move(impl));
+}
+
+} // namespace tests
+
+TEST(BamRecordMappingTest, BasicMap)
+{
+ const Position qStart = 500;
+ const Position qEnd = 510;
+ const string seq = "AACCGTTAGC";
+ const string quals = "?]?]?]?]?*";
+ const string tagBases = "AACCGTTAGC";
+ const string tagQuals = "?]?]?]?]?*";
+ const f_data frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+ const uint8_t mapQual = 80;
+
+ const string seq_rev = "GCTAACGGTT";
+ const string quals_rev = "*?]?]?]?]?";
+ const string tagBases_rev = seq_rev;
+ const string tagQuals_rev = quals_rev;
+ const f_data frames_rev = { 20, 30, 10, 40, 40, 30, 20, 20, 10, 10 };
+
+ const string s1_cigar = "10=";
+ const string s2_cigar = "5=3D5=";
+ const string s3_cigar = "4=1D2I2D4=";
+
+ BamRecord s1 = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s2 = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s3 = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s1_rev = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s2_rev = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s3_rev = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+
+ s1.Map(0, 100, Strand::FORWARD, s1_cigar, mapQual);
+ s2.Map(0, 100, Strand::FORWARD, s2_cigar, mapQual);
+ s3.Map(0, 100, Strand::FORWARD, s3_cigar, mapQual);
+ s1_rev.Map(0, 100, Strand::REVERSE, s1_cigar, mapQual);
+ s2_rev.Map(0, 100, Strand::REVERSE, s2_cigar, mapQual);
+ s3_rev.Map(0, 100, Strand::REVERSE, s3_cigar, mapQual);
+
+ { // s1 - FORWARD
+ EXPECT_TRUE(s1.IsMapped());
+ EXPECT_EQ(0, s1.ReferenceId());
+ EXPECT_EQ(Strand::FORWARD, s1.AlignedStrand());
+ EXPECT_EQ(mapQual, s1.MapQuality());
+
+ EXPECT_EQ(qStart, s1.QueryStart());
+ EXPECT_EQ(qEnd, s1.QueryEnd());
+ EXPECT_EQ(500, s1.AlignedStart());
+ EXPECT_EQ(510, s1.AlignedEnd()); // 500 + 10=
+ EXPECT_EQ(100, s1.ReferenceStart());
+ EXPECT_EQ(110, s1.ReferenceEnd()); // 100 + 10=
+
+ const BamRecordView view
+ {
+ s1,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq, view.Sequence());
+ EXPECT_EQ(quals, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases, view.DeletionTags());
+ EXPECT_EQ(tagQuals, view.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, view.IPD().Data());
+ }
+
+ { // s1 - REVERSE
+
+ EXPECT_TRUE(s1_rev.IsMapped());
+ EXPECT_EQ(0, s1_rev.ReferenceId());
+ EXPECT_EQ(Strand::REVERSE, s1_rev.AlignedStrand());
+ EXPECT_EQ(mapQual, s1_rev.MapQuality());
+
+ EXPECT_EQ(qStart, s1_rev.QueryStart());
+ EXPECT_EQ(qEnd, s1_rev.QueryEnd());
+ EXPECT_EQ(500, s1_rev.AlignedStart());
+ EXPECT_EQ(510, s1_rev.AlignedEnd()); // 500 + 10=
+ EXPECT_EQ(100, s1_rev.ReferenceStart());
+ EXPECT_EQ(110, s1_rev.ReferenceEnd()); // 100 + 10=
+
+ // native
+ const BamRecordView nativeView
+ {
+ s1_rev,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+ EXPECT_EQ(seq, nativeView.Sequence());
+ EXPECT_EQ(quals, nativeView.Qualities().Fastq());
+ EXPECT_EQ(tagBases, nativeView.DeletionTags());
+ EXPECT_EQ(tagQuals, nativeView.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, nativeView.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, nativeView.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, nativeView.IPD().Data());
+
+ // - genomic
+ const BamRecordView genomicView
+ {
+ s1_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+ EXPECT_EQ(seq_rev, genomicView.Sequence());
+ EXPECT_EQ(quals_rev, genomicView.Qualities().Fastq());
+ EXPECT_EQ(tagBases_rev, genomicView.DeletionTags());
+ EXPECT_EQ(tagQuals_rev, genomicView.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals_rev, genomicView.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals_rev, genomicView.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_rev, genomicView.IPD().Data());
+ }
+
+ { // s2 - FORWARD
+
+ EXPECT_TRUE(s2.IsMapped());
+ EXPECT_EQ(0, s2.ReferenceId());
+ EXPECT_EQ(Strand::FORWARD, s2.AlignedStrand());
+ EXPECT_EQ(mapQual, s2.MapQuality());
+
+ EXPECT_EQ(qStart, s2.QueryStart());
+ EXPECT_EQ(qEnd, s2.QueryEnd());
+ EXPECT_EQ(500, s2.AlignedStart());
+ EXPECT_EQ(510, s2.AlignedEnd()); // 500 + 10=
+ EXPECT_EQ(100, s2.ReferenceStart());
+ EXPECT_EQ(113, s2.ReferenceEnd()); // 100 + 10= + 3D
+
+ const BamRecordView view
+ {
+ s2,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq, view.Sequence());
+ EXPECT_EQ(quals, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases, view.DeletionTags());
+ EXPECT_EQ(tagQuals, view.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, view.IPD().Data());
+ }
+
+ { // s2 - REVERSE
+
+ EXPECT_TRUE(s2_rev.IsMapped());
+ EXPECT_EQ(0, s2_rev.ReferenceId());
+ EXPECT_EQ(Strand::REVERSE, s2_rev.AlignedStrand());
+ EXPECT_EQ(mapQual, s2_rev.MapQuality());
+
+ EXPECT_EQ(qStart, s2_rev.QueryStart());
+ EXPECT_EQ(qEnd, s2_rev.QueryEnd());
+ EXPECT_EQ(500, s2_rev.AlignedStart());
+ EXPECT_EQ(510, s2_rev.AlignedEnd()); // 500 + 10=
+ EXPECT_EQ(100, s2_rev.ReferenceStart());
+ EXPECT_EQ(113, s2_rev.ReferenceEnd()); // 100 + 10= + 3D
+
+ // - native
+ const BamRecordView nativeView
+ {
+ s2_rev,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+ EXPECT_EQ(seq, nativeView.Sequence());
+ EXPECT_EQ(quals, nativeView.Qualities().Fastq());
+ EXPECT_EQ(tagBases, nativeView.DeletionTags());
+ EXPECT_EQ(tagQuals, nativeView.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, nativeView.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, nativeView.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, nativeView.IPD().Data());
+
+ // - genomic
+ const BamRecordView genomicView
+ {
+ s2_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+ EXPECT_EQ(seq_rev, genomicView.Sequence());
+ EXPECT_EQ(quals_rev, genomicView.Qualities().Fastq());
+ EXPECT_EQ(tagBases_rev, genomicView.DeletionTags());
+ EXPECT_EQ(tagQuals_rev, genomicView.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals_rev, genomicView.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals_rev, genomicView.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_rev, genomicView.IPD().Data());
+ }
+
+ { // s3 - FORWARD
+
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(0, s3.ReferenceId());
+ EXPECT_EQ(Strand::FORWARD, s3.AlignedStrand());
+ EXPECT_EQ(mapQual, s3.MapQuality());
+
+ EXPECT_EQ(qStart, s3.QueryStart());
+ EXPECT_EQ(qEnd, s3.QueryEnd());
+ EXPECT_EQ(500, s3.AlignedStart());
+ EXPECT_EQ(510, s3.AlignedEnd()); // 500 + 8= + 2I
+ EXPECT_EQ(100, s3.ReferenceStart());
+ EXPECT_EQ(111, s3.ReferenceEnd()); // 100 + 8= + 3D
+
+ const BamRecordView view
+ {
+ s3,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq, view.Sequence());
+ EXPECT_EQ(quals, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases, view.DeletionTags());
+ EXPECT_EQ(tagQuals, view.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, view.IPD().Data());
+ }
+
+ { // s3 - REVERSE
+
+ EXPECT_TRUE(s3_rev.IsMapped());
+ EXPECT_EQ(0, s3_rev.ReferenceId());
+ EXPECT_EQ(Strand::REVERSE, s3_rev.AlignedStrand());
+ EXPECT_EQ(mapQual, s3_rev.MapQuality());
+
+ EXPECT_EQ(qStart, s3_rev.QueryStart());
+ EXPECT_EQ(qEnd, s3_rev.QueryEnd());
+ EXPECT_EQ(500, s3_rev.AlignedStart());
+ EXPECT_EQ(510, s3_rev.AlignedEnd()); // 500 + 8= + 2I
+ EXPECT_EQ(100, s3_rev.ReferenceStart());
+ EXPECT_EQ(111, s3_rev.ReferenceEnd()); // 100 + 8= + 3D
+
+ // - native
+ const BamRecordView nativeView
+ {
+ s3_rev,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+ EXPECT_EQ(seq, nativeView.Sequence());
+ EXPECT_EQ(quals, nativeView.Qualities().Fastq());
+ EXPECT_EQ(tagBases, nativeView.DeletionTags());
+ EXPECT_EQ(tagQuals, nativeView.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, nativeView.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, nativeView.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, nativeView.IPD().Data());
+
+ // - genomic
+ const BamRecordView genomicView
+ {
+ s3_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+ EXPECT_EQ(seq_rev, genomicView.Sequence());
+ EXPECT_EQ(quals_rev, genomicView.Qualities().Fastq());
+ EXPECT_EQ(tagBases_rev, genomicView.DeletionTags());
+ EXPECT_EQ(tagQuals_rev, genomicView.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals_rev, genomicView.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals_rev, genomicView.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_rev, genomicView.IPD().Data());
+ }
+}
+
+TEST(BamRecordMappingTest, SoftClipMapping)
+{
+ const Position qStart = 500;
+ const Position qEnd = 515;
+ const string seq = "TTAACCGTTAGCAAA";
+ const string quals = "--?]?]?]?]?*+++";
+ const string tagBases = "TTAACCGTTAGCAAA";
+ const string tagQuals = "--?]?]?]?]?*+++";
+ const f_data frames = { 40, 40, 10, 10, 20, 20, 30, 40, 40, 10, 30, 20, 10, 10, 10 };
+ const uint8_t mapQual = 80;
+
+ const string clipped_seq = "AACCGTTAGC";
+ const string clipped_quals = "?]?]?]?]?*";
+ const string clipped_tagBases = "AACCGTTAGC";
+ const string clipped_tagQuals = "?]?]?]?]?*";
+ const f_data clipped_frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+
+ const string seq_rev = "TTTGCTAACGGTTAA";
+ const string quals_rev = "+++*?]?]?]?]?--";
+ const string tagBases_rev = seq_rev;
+ const string tagQuals_rev = quals_rev;
+ const f_data frames_rev = { 10, 10, 10, 20, 30, 10, 40, 40, 30, 20, 20, 10, 10, 40, 40 };
+
+ const string clipped_seq_rev = "GCTAACGGTT";
+ const string clipped_quals_rev = "*?]?]?]?]?";
+ const string clipped_tagBases_rev = clipped_seq_rev;
+ const string clipped_tagQuals_rev = clipped_quals_rev;
+ const f_data clipped_frames_rev = { 20, 30, 10, 40, 40, 30, 20, 20, 10, 10 };
+
+ const string s1_cigar = "2S10=3S";
+ const string s2_cigar = "2S5=3D5=3S";
+ const string s3_cigar = "2S4=1D2I2D4=3S";
+
+ BamRecord s1 = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s2 = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s3 = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s1_rev = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s2_rev = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s3_rev = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+
+ s1.Map(0, 100, Strand::FORWARD, s1_cigar, mapQual);
+ s2.Map(0, 100, Strand::FORWARD, s2_cigar, mapQual);
+ s3.Map(0, 100, Strand::FORWARD, s3_cigar, mapQual);
+ s1_rev.Map(0, 100, Strand::REVERSE, s1_cigar, mapQual);
+ s2_rev.Map(0, 100, Strand::REVERSE, s2_cigar, mapQual);
+ s3_rev.Map(0, 100, Strand::REVERSE, s3_cigar, mapQual);
+
+ { // s1 - FORWARD
+
+ EXPECT_TRUE(s1.IsMapped());
+ EXPECT_EQ(0, s1.ReferenceId());
+ EXPECT_EQ(Strand::FORWARD, s1.AlignedStrand());
+ EXPECT_EQ(mapQual, s1.MapQuality());
+
+ EXPECT_EQ(qStart, s1.QueryStart()); // 500
+ EXPECT_EQ(qEnd, s1.QueryEnd()); // QStart + seqLength
+ EXPECT_EQ(502, s1.AlignedStart()); // QStart + 2S
+ EXPECT_EQ(512, s1.AlignedEnd()); // AStart + 10=
+ EXPECT_EQ(100, s1.ReferenceStart()); // 100
+ EXPECT_EQ(110, s1.ReferenceEnd()); // RefStart + 10=
+
+ const BamRecordView view
+ {
+ s1,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq, view.Sequence());
+ EXPECT_EQ(quals, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases, view.DeletionTags());
+ EXPECT_EQ(tagQuals, view.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, view.IPD().Data());
+ }
+
+ { // s1 - REVERSE
+
+ EXPECT_TRUE(s1_rev.IsMapped());
+ EXPECT_EQ(0, s1_rev.ReferenceId());
+ EXPECT_EQ(Strand::REVERSE, s1_rev.AlignedStrand());
+ EXPECT_EQ(mapQual, s1_rev.MapQuality());
+
+ EXPECT_EQ(qStart, s1_rev.QueryStart()); // 500
+ EXPECT_EQ(qEnd, s1_rev.QueryEnd()); // QStart + seqLength
+ EXPECT_EQ(503, s1_rev.AlignedStart()); // QStart + 3S
+ EXPECT_EQ(513, s1_rev.AlignedEnd()); // AStart + 10=
+ EXPECT_EQ(100, s1_rev.ReferenceStart()); // 100
+ EXPECT_EQ(110, s1_rev.ReferenceEnd()); // RefStart + 10=
+
+ // - native
+ const BamRecordView nativeView
+ {
+ s1_rev,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+ EXPECT_EQ(seq, nativeView.Sequence());
+ EXPECT_EQ(quals, nativeView.Qualities().Fastq());
+ EXPECT_EQ(tagBases, nativeView.DeletionTags());
+ EXPECT_EQ(tagQuals, nativeView.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, nativeView.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, nativeView.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, nativeView.IPD().Data());
+
+ // - genomic
+ const BamRecordView genomicView
+ {
+ s1_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+ EXPECT_EQ(seq_rev, genomicView.Sequence());
+ EXPECT_EQ(quals_rev, genomicView.Qualities().Fastq());
+ EXPECT_EQ(tagBases_rev, genomicView.DeletionTags());
+ EXPECT_EQ(tagQuals_rev, genomicView.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals_rev, genomicView.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals_rev, genomicView.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_rev, genomicView.IPD().Data());
+ }
+
+ { // s2 - FORWARD
+
+ EXPECT_TRUE(s2.IsMapped());
+ EXPECT_EQ(0, s2.ReferenceId());
+ EXPECT_EQ(Strand::FORWARD, s2.AlignedStrand());
+ EXPECT_EQ(mapQual, s2.MapQuality());
+
+ EXPECT_EQ(qStart, s2.QueryStart()); // 500
+ EXPECT_EQ(qEnd, s2.QueryEnd()); // QStart + seqLength
+ EXPECT_EQ(502, s2.AlignedStart()); // QStart + 2S
+ EXPECT_EQ(512, s2.AlignedEnd()); // AStart + 10=
+ EXPECT_EQ(100, s2.ReferenceStart()); // 100
+ EXPECT_EQ(113, s2.ReferenceEnd()); // RefStart + 10= + 3D
+
+ const BamRecordView view
+ {
+ s2,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq, view.Sequence());
+ EXPECT_EQ(quals, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases, view.DeletionTags());
+ EXPECT_EQ(tagQuals, view.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, view.IPD().Data());
+ }
+
+ { // s2 - REVERSE
+
+ EXPECT_TRUE(s2_rev.IsMapped());
+ EXPECT_EQ(0, s2_rev.ReferenceId());
+ EXPECT_EQ(Strand::REVERSE, s2_rev.AlignedStrand());
+ EXPECT_EQ(mapQual, s2_rev.MapQuality());
+
+ EXPECT_EQ(qStart, s2_rev.QueryStart()); // 500
+ EXPECT_EQ(qEnd, s2_rev.QueryEnd()); // QStart + seqLength
+ EXPECT_EQ(503, s2_rev.AlignedStart()); // QStart + 3S
+ EXPECT_EQ(513, s2_rev.AlignedEnd()); // AStart + 10=
+ EXPECT_EQ(100, s2_rev.ReferenceStart()); // 100
+ EXPECT_EQ(113, s2_rev.ReferenceEnd()); // RefStart + 10= + 3D
+
+ // - native
+ const BamRecordView nativeView
+ {
+ s2_rev,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+ EXPECT_EQ(seq, nativeView.Sequence());
+ EXPECT_EQ(quals, nativeView.Qualities().Fastq());
+ EXPECT_EQ(tagBases, nativeView.DeletionTags());
+ EXPECT_EQ(tagQuals, nativeView.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, nativeView.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, nativeView.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, nativeView.IPD().Data());
+
+ // - genomic
+ const BamRecordView genomicView
+ {
+ s2_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+ EXPECT_EQ(seq_rev, genomicView.Sequence());
+ EXPECT_EQ(quals_rev, genomicView.Qualities().Fastq());
+ EXPECT_EQ(tagBases_rev, genomicView.DeletionTags());
+ EXPECT_EQ(tagQuals_rev, genomicView.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals_rev, genomicView.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals_rev, genomicView.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_rev, genomicView.IPD().Data());
+ }
+
+ { // s3 - FORWARD
+
+ EXPECT_TRUE(s3.IsMapped());
+ EXPECT_EQ(0, s3.ReferenceId());
+ EXPECT_EQ(Strand::FORWARD, s3.AlignedStrand());
+ EXPECT_EQ(mapQual, s3.MapQuality());
+
+ EXPECT_EQ(qStart, s3.QueryStart()); // 500
+ EXPECT_EQ(qEnd, s3.QueryEnd()); // QStart + seqLength
+ EXPECT_EQ(502, s3.AlignedStart()); // QStart + 2S
+ EXPECT_EQ(512, s3.AlignedEnd()); // AStart + 8= + 2I
+ EXPECT_EQ(100, s3.ReferenceStart()); // 100
+ EXPECT_EQ(111, s3.ReferenceEnd()); // RefStart + 8= + 3D
+
+ const BamRecordView view
+ {
+ s2,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq, view.Sequence());
+ EXPECT_EQ(quals, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases, view.DeletionTags());
+ EXPECT_EQ(tagQuals, view.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, view.IPD().Data());
+ }
+
+ { // s3 - REVERSE
+
+ EXPECT_TRUE(s3_rev.IsMapped());
+ EXPECT_EQ(0, s3_rev.ReferenceId());
+ EXPECT_EQ(Strand::REVERSE, s3_rev.AlignedStrand());
+ EXPECT_EQ(mapQual, s3_rev.MapQuality());
+
+ EXPECT_EQ(qStart, s3_rev.QueryStart()); // 500
+ EXPECT_EQ(qEnd, s3_rev.QueryEnd()); // QStart + seqLength
+ EXPECT_EQ(503, s3_rev.AlignedStart()); // QStart + 3S
+ EXPECT_EQ(513, s3_rev.AlignedEnd()); // AStart + 8= + 2I
+ EXPECT_EQ(100, s3_rev.ReferenceStart()); // 100
+ EXPECT_EQ(111, s3_rev.ReferenceEnd()); // RefStart + 8= + 3D
+
+ // - native
+ const BamRecordView nativeView
+ {
+ s3_rev,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+ EXPECT_EQ(seq, nativeView.Sequence());
+ EXPECT_EQ(quals, nativeView.Qualities().Fastq());
+ EXPECT_EQ(tagBases, nativeView.DeletionTags());
+ EXPECT_EQ(tagQuals, nativeView.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, nativeView.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, nativeView.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, nativeView.IPD().Data());
+
+ // - genomic
+ const BamRecordView genomicView
+ {
+ s3_rev,
+ Orientation::GENOMIC,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+ EXPECT_EQ(seq_rev, genomicView.Sequence());
+ EXPECT_EQ(quals_rev, genomicView.Qualities().Fastq());
+ EXPECT_EQ(tagBases_rev, genomicView.DeletionTags());
+ EXPECT_EQ(tagQuals_rev, genomicView.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals_rev, genomicView.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals_rev, genomicView.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames_rev, genomicView.IPD().Data());
+ }
+}
+
+TEST(BamRecordMappingTest, MappedCopy)
+{
+ const Position qStart = 500;
+ const Position qEnd = 510;
+ const string seq = "AACCGTTAGC";
+ const string quals = "?]?]?]?]?*";
+ const string tagBases = "AACCGTTAGC";
+ const string tagQuals = "?]?]?]?]?*";
+ const f_data frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+ const uint8_t mapQual = 80;
+ const string cigar = "4=1D2I2D4=";
+
+ const BamRecord orig = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ const BamRecord mapped = orig.Mapped(0, 100, Strand::FORWARD, cigar, mapQual);
+
+ EXPECT_TRUE(mapped.IsMapped());
+ EXPECT_EQ(0, mapped.ReferenceId());
+ EXPECT_EQ(Strand::FORWARD, mapped.AlignedStrand());
+ EXPECT_EQ(mapQual, mapped.MapQuality());
+
+ EXPECT_EQ(500, mapped.QueryStart()); // 500
+ EXPECT_EQ(510, mapped.QueryEnd()); // QStart + seqLength
+ EXPECT_EQ(500, mapped.AlignedStart()); // QStart
+ EXPECT_EQ(510, mapped.AlignedEnd()); // QStart + 8= + 2I
+ EXPECT_EQ(100, mapped.ReferenceStart()); // 100
+ EXPECT_EQ(111, mapped.ReferenceEnd()); // RefStart + 8= + 3D
+
+ const BamRecordView view
+ {
+ mapped,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq, view.Sequence());
+ EXPECT_EQ(quals, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases, view.DeletionTags());
+ EXPECT_EQ(tagQuals, view.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, view.IPD().Data());
+}
+
+TEST(BamRecordMappingTest, StaticMapped)
+{
+ const Position qStart = 500;
+ const Position qEnd = 510;
+ const string seq = "AACCGTTAGC";
+ const string quals = "?]?]?]?]?*";
+ const string tagBases = "AACCGTTAGC";
+ const string tagQuals = "?]?]?]?]?*";
+ const f_data frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+ const uint8_t mapQual = 80;
+ const string cigar = "4=1D2I2D4=";
+
+ const BamRecord orig = tests::MakeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ const BamRecord mapped = BamRecord::Mapped(orig, 0, 100, Strand::FORWARD, cigar, mapQual);
+
+ EXPECT_TRUE(mapped.IsMapped());
+ EXPECT_EQ(0, mapped.ReferenceId());
+ EXPECT_EQ(Strand::FORWARD, mapped.AlignedStrand());
+ EXPECT_EQ(mapQual, mapped.MapQuality());
+
+ EXPECT_EQ(500, mapped.QueryStart()); // 500
+ EXPECT_EQ(510, mapped.QueryEnd()); // QStart + seqLength
+ EXPECT_EQ(500, mapped.AlignedStart()); // QStart
+ EXPECT_EQ(510, mapped.AlignedEnd()); // QStart + 8= + 2I
+ EXPECT_EQ(100, mapped.ReferenceStart()); // 100
+ EXPECT_EQ(111, mapped.ReferenceEnd()); // RefStart + 8= + 3D
+
+ const BamRecordView view
+ {
+ mapped,
+ Orientation::NATIVE,
+ false,
+ false,
+ PulseBehavior::ALL
+ };
+
+ EXPECT_EQ(seq, view.Sequence());
+ EXPECT_EQ(quals, view.Qualities().Fastq());
+ EXPECT_EQ(tagBases, view.DeletionTags());
+ EXPECT_EQ(tagQuals, view.DeletionQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.LabelQVs().Fastq());
+ EXPECT_EQ(tagQuals, view.AltLabelQVs().Fastq());
+ EXPECT_EQ(frames, view.IPD().Data());
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/BamHeader.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/BamWriter.h>
+#include <pbbam/EntireFileQuery.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(BamWriterTest, SingleWrite_UserRecord)
+{
+ const string fullName = "test/100/0_5";
+ const string rgId = "6002b307";
+ const vector<float> expectedSnr = {0.2,0.2,0.2,0.2};
+
+ // setup header
+ const string hdrText = {
+ "@HD\tVN:1.1\tSO:unknown\tpb:3.0.1\n"
+ "@RG\tID:6002b307\tPL:PACBIO\tDS:READTYPE=SUBREAD;BINDINGKIT=100-619-300;"
+ "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100\t"
+ "PU:test\tPM:SEQUEL\n"
+ };
+ BamHeader inputHeader(hdrText);
+
+ // setup record
+ BamRecord bamRecord(inputHeader);
+ bamRecord.impl_.Name(fullName);
+ bamRecord.impl_.SetSequenceAndQualities("ACGTC", 5);
+ bamRecord.impl_.CigarData("");
+ bamRecord.impl_.Bin(0);
+ bamRecord.impl_.Flag(0);
+ bamRecord.impl_.InsertSize(0);
+ bamRecord.impl_.MapQuality(0);
+ bamRecord.impl_.MatePosition(-1);
+ bamRecord.impl_.MateReferenceId(-1);
+ bamRecord.impl_.Position(-1);
+ bamRecord.impl_.ReferenceId(-1);
+ bamRecord.impl_.SetMapped(false);
+
+ TagCollection tags;
+ tags["zm"] = static_cast<int32_t>(100);
+ tags["qs"] = static_cast<Position>(0);
+ tags["qe"] = static_cast<Position>(5);
+ tags["np"] = static_cast<int32_t>(1);
+ tags["rq"] = static_cast<float>(0.6);
+ tags["RG"] = rgId;
+ tags["sn"] = expectedSnr;
+ bamRecord.impl_.Tags(tags);
+
+ // write record to file
+ const string generatedBamFn = tests::GeneratedData_Dir + "/bamwriter_generated.bam";
+ {
+ BamWriter writer(generatedBamFn, inputHeader);
+ writer.Write(bamRecord);
+ }
+
+ // check written header
+ BamFile file(generatedBamFn);
+ const auto header = file.Header();
+ EXPECT_EQ(std::string("1.1"), header.Version());
+ EXPECT_EQ(std::string("unknown"), header.SortOrder());
+ EXPECT_EQ(std::string("3.0.1"), header.PacBioBamVersion());
+
+ // check written record
+ EntireFileQuery entireFile(file);
+ auto firstIter = entireFile.begin();
+ auto record = *firstIter;
+ EXPECT_EQ(std::string("ACGTC"), record.Sequence());
+ EXPECT_EQ(std::string("test/100/0_5"), record.FullName());
+ EXPECT_TRUE(record.HasHoleNumber());
+ EXPECT_TRUE(record.HasNumPasses());
+ EXPECT_TRUE(record.HasQueryEnd());
+ EXPECT_TRUE(record.HasQueryStart());
+ EXPECT_TRUE(record.HasReadAccuracy());
+ EXPECT_TRUE(record.HasSignalToNoise());
+ EXPECT_EQ(100, record.HoleNumber());
+ EXPECT_EQ(1, record.NumPasses());
+ EXPECT_EQ(0, record.QueryStart());
+ EXPECT_EQ(5, record.QueryEnd());
+ EXPECT_EQ(expectedSnr, record.SignalToNoise());
+ EXPECT_EQ(rgId, record.ReadGroupId());
+
+ // clean up
+ remove(generatedBamFn.c_str());
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/BarcodeQuery.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(BarcodeQueryTest, QueryOk)
+{
+ // come back with barcoded data
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/Cigar.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(CigarTest, TypeToCar)
+{
+ EXPECT_EQ('M', CigarOperation::TypeToChar(CigarOperationType::ALIGNMENT_MATCH) );
+ EXPECT_EQ('I', CigarOperation::TypeToChar(CigarOperationType::INSERTION) );
+ EXPECT_EQ('D', CigarOperation::TypeToChar(CigarOperationType::DELETION) );
+ EXPECT_EQ('N', CigarOperation::TypeToChar(CigarOperationType::REFERENCE_SKIP) );
+ EXPECT_EQ('S', CigarOperation::TypeToChar(CigarOperationType::SOFT_CLIP) );
+ EXPECT_EQ('H', CigarOperation::TypeToChar(CigarOperationType::HARD_CLIP) );
+ EXPECT_EQ('P', CigarOperation::TypeToChar(CigarOperationType::PADDING) );
+ EXPECT_EQ('=', CigarOperation::TypeToChar(CigarOperationType::SEQUENCE_MATCH) );
+ EXPECT_EQ('X', CigarOperation::TypeToChar(CigarOperationType::SEQUENCE_MISMATCH) );
+}
+
+TEST(CigarTest, CharToType)
+{
+ EXPECT_EQ(CigarOperationType::ALIGNMENT_MATCH, CigarOperation::CharToType('M'));
+ EXPECT_EQ(CigarOperationType::INSERTION, CigarOperation::CharToType('I'));
+ EXPECT_EQ(CigarOperationType::DELETION, CigarOperation::CharToType('D'));
+ EXPECT_EQ(CigarOperationType::REFERENCE_SKIP, CigarOperation::CharToType('N'));
+ EXPECT_EQ(CigarOperationType::SOFT_CLIP, CigarOperation::CharToType('S'));
+ EXPECT_EQ(CigarOperationType::HARD_CLIP, CigarOperation::CharToType('H'));
+ EXPECT_EQ(CigarOperationType::PADDING, CigarOperation::CharToType('P'));
+ EXPECT_EQ(CigarOperationType::SEQUENCE_MATCH, CigarOperation::CharToType('='));
+ EXPECT_EQ(CigarOperationType::SEQUENCE_MISMATCH, CigarOperation::CharToType('X'));
+}
+
+TEST(CigarTest, SetOperationYieldsCorrectType)
+{
+ CigarOperation c1; c1.Type(CigarOperationType::ALIGNMENT_MATCH);
+ CigarOperation c2; c2.Type(CigarOperationType::INSERTION);
+ CigarOperation c3; c3.Type(CigarOperationType::DELETION);
+ CigarOperation c4; c4.Type(CigarOperationType::REFERENCE_SKIP);
+ CigarOperation c5; c5.Type(CigarOperationType::SOFT_CLIP);
+ CigarOperation c6; c6.Type(CigarOperationType::HARD_CLIP);
+ CigarOperation c7; c7.Type(CigarOperationType::PADDING);
+ CigarOperation c8; c8.Type(CigarOperationType::SEQUENCE_MATCH);
+ CigarOperation c9; c9.Type(CigarOperationType::SEQUENCE_MISMATCH);
+
+ EXPECT_EQ('M', c1.Char());
+ EXPECT_EQ('I', c2.Char());
+ EXPECT_EQ('D', c3.Char());
+ EXPECT_EQ('N', c4.Char());
+ EXPECT_EQ('S', c5.Char());
+ EXPECT_EQ('H', c6.Char());
+ EXPECT_EQ('P', c7.Char());
+ EXPECT_EQ('=', c8.Char());
+ EXPECT_EQ('X', c9.Char());
+}
+
+TEST(CigarTest, SetTypeYieldsCorrectOperation)
+{
+ CigarOperation c1; c1.Char('M');
+ CigarOperation c2; c2.Char('I');
+ CigarOperation c3; c3.Char('D');
+ CigarOperation c4; c4.Char('N');
+ CigarOperation c5; c5.Char('S');
+ CigarOperation c6; c6.Char('H');
+ CigarOperation c7; c7.Char('P');
+ CigarOperation c8; c8.Char('=');
+ CigarOperation c9; c9.Char('X');
+
+ EXPECT_EQ(CigarOperationType::ALIGNMENT_MATCH, c1.Type());
+ EXPECT_EQ(CigarOperationType::INSERTION, c2.Type());
+ EXPECT_EQ(CigarOperationType::DELETION, c3.Type());
+ EXPECT_EQ(CigarOperationType::REFERENCE_SKIP, c4.Type());
+ EXPECT_EQ(CigarOperationType::SOFT_CLIP, c5.Type());
+ EXPECT_EQ(CigarOperationType::HARD_CLIP, c6.Type());
+ EXPECT_EQ(CigarOperationType::PADDING, c7.Type());
+ EXPECT_EQ(CigarOperationType::SEQUENCE_MATCH, c8.Type());
+ EXPECT_EQ(CigarOperationType::SEQUENCE_MISMATCH, c9.Type());
+}
+
+TEST(CigarStringTest, FromStdString_Empty)
+{
+ const string emptyCigar = "";
+ Cigar cigar = Cigar::FromStdString(emptyCigar);
+ EXPECT_TRUE(cigar.empty());
+}
+
+TEST(CigarStringTest, FromStdString_SingleOp)
+{
+ const string singleCigar = "100=";
+
+ Cigar cigar = Cigar::FromStdString(singleCigar);
+ ASSERT_TRUE(cigar.size() == 1);
+
+ const CigarOperation& op = cigar.front();
+ EXPECT_TRUE(op.Char() == '=');
+ EXPECT_TRUE(op.Length() == 100);
+}
+
+TEST(CigarStringTest, FromStdString_MultipleOps)
+{
+ const string multiCigar = "100=2D34I6=6X6=";
+
+ Cigar cigar = Cigar::FromStdString(multiCigar);
+ ASSERT_TRUE(cigar.size() == 6);
+
+ CigarOperation op0 = cigar.at(0);
+ CigarOperation op1 = cigar.at(1);
+ CigarOperation op2 = cigar.at(2);
+ CigarOperation op3 = cigar.at(3);
+ CigarOperation op4 = cigar.at(4);
+ CigarOperation op5 = cigar.at(5);
+
+ EXPECT_TRUE(op0.Char() == '=');
+ EXPECT_TRUE(op0.Length() == 100);
+ EXPECT_TRUE(op1.Char() == 'D');
+ EXPECT_TRUE(op1.Length() == 2);
+ EXPECT_TRUE(op2.Char() == 'I');
+ EXPECT_TRUE(op2.Length() == 34);
+ EXPECT_TRUE(op3.Char() == '=');
+ EXPECT_TRUE(op3.Length() == 6);
+ EXPECT_TRUE(op4.Char() == 'X');
+ EXPECT_TRUE(op4.Length() == 6);
+ EXPECT_TRUE(op5.Char() == '=');
+ EXPECT_TRUE(op5.Length() == 6);
+}
+
+TEST(CigarStringTest, ToStdString_Empty)
+{
+ const string empty;
+ Cigar cigar;
+ EXPECT_EQ(empty, cigar.ToStdString());
+}
+
+TEST(CigarStringTest, ToStdString_SingleOp)
+{
+ const string singleCigar = "100=";
+
+ Cigar cigar;
+ cigar.push_back( CigarOperation(CigarOperationType::SEQUENCE_MATCH, 100) );
+
+ EXPECT_EQ(singleCigar, cigar.ToStdString());
+}
+
+TEST(CigarStringTest, ToStdString_MultipleOps)
+{
+ const string multiCigar = "100=2D34I6=6X6=";
+
+ Cigar cigar;
+ cigar.push_back(CigarOperation(CigarOperationType::SEQUENCE_MATCH, 100));
+ cigar.push_back(CigarOperation(CigarOperationType::DELETION, 2));
+ cigar.push_back(CigarOperation(CigarOperationType::INSERTION, 34));
+ cigar.push_back(CigarOperation(CigarOperationType::SEQUENCE_MATCH, 6));
+ cigar.push_back(CigarOperation(CigarOperationType::SEQUENCE_MISMATCH, 6));
+ cigar.push_back(CigarOperation(CigarOperationType::SEQUENCE_MATCH, 6));
+
+ EXPECT_EQ(multiCigar, cigar.ToStdString());
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/Compare.h>
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace tests {
+
+static inline
+BamRecord makeRecordWithTag(const string& tagName,
+ const Tag& tag)
+{
+ auto r = BamRecord{ };
+ r.Impl().AddTag(tagName, tag);
+ return r;
+}
+
+static
+BamRecord makeRecord(const Position qStart,
+ const Position qEnd,
+ const string& seq,
+ const string& quals,
+ const string& tagBases,
+ const string& tagQuals,
+ const vector<uint16_t>& frames)
+{
+ BamRecordImpl impl;
+ impl.SetSequenceAndQualities(seq, quals);
+
+ TagCollection tags;
+ tags["qs"] = qStart;
+ tags["qe"] = qEnd;
+ tags["ip"] = frames;
+ tags["pw"] = frames;
+ tags["dt"] = tagBases;
+ tags["st"] = tagBases;
+ tags["dq"] = tagQuals;
+ tags["iq"] = tagQuals;
+ tags["mq"] = tagQuals;
+ tags["sq"] = tagQuals;
+ tags["pq"] = tagQuals;
+ tags["pv"] = tagQuals;
+ impl.Tags(tags);
+
+ return BamRecord(std::move(impl));
+}
+
+static
+std::vector<BamRecord> makeMappedRecords(void)
+{
+ const Position qStart = 500;
+ const Position qEnd = 510;
+ const string seq = "AACCGTTAGC";
+ const string quals = "?]?]?]?]?*";
+ const string tagBases = "AACCGTTAGC";
+ const string tagQuals = "?]?]?]?]?*";
+ const vector<uint16_t> frames = { 10, 10, 20, 20, 30, 40, 40, 10, 30, 20 };
+ const uint8_t mapQual = 80;
+
+ const string s1_cigar = "10=";
+ const string s2_cigar = "5=3D5=";
+ const string s3_cigar = "4=1D2I2D2X2=";
+
+ BamRecord s1 = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s2 = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s3 = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s1_rev = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s2_rev = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+ BamRecord s3_rev = tests::makeRecord(qStart, qEnd, seq, quals, tagBases, tagQuals, frames);
+
+ s1.Map(0, 100, Strand::FORWARD, s1_cigar, mapQual);
+ s2.Map(0, 100, Strand::FORWARD, s2_cigar, mapQual);
+ s3.Map(0, 100, Strand::FORWARD, s3_cigar, mapQual);
+ s1_rev.Map(0, 100, Strand::REVERSE, s1_cigar, mapQual);
+ s2_rev.Map(0, 100, Strand::REVERSE, s2_cigar, mapQual);
+ s3_rev.Map(0, 100, Strand::REVERSE, s3_cigar, mapQual);
+
+ return std::vector<BamRecord> { s1, s2, s3, s1_rev, s2_rev, s3_rev };
+}
+
+} // namespace tests
+
+TEST(CompareTest, TypeToNameOk)
+{
+ EXPECT_EQ(string{"Compare::EQUAL"}, Compare::TypeToName(Compare::EQUAL));
+ EXPECT_EQ(string{"Compare::NOT_EQUAL"}, Compare::TypeToName(Compare::NOT_EQUAL));
+ EXPECT_EQ(string{"Compare::LESS_THAN"}, Compare::TypeToName(Compare::LESS_THAN));
+ EXPECT_EQ(string{"Compare::LESS_THAN_EQUAL"}, Compare::TypeToName(Compare::LESS_THAN_EQUAL));
+ EXPECT_EQ(string{"Compare::GREATER_THAN"}, Compare::TypeToName(Compare::GREATER_THAN));
+ EXPECT_EQ(string{"Compare::GREATER_THAN_EQUAL"}, Compare::TypeToName(Compare::GREATER_THAN_EQUAL));
+ EXPECT_EQ(string{"Compare::CONTAINS"}, Compare::TypeToName(Compare::CONTAINS));
+ EXPECT_EQ(string{"Compare::NOT_CONTAINS"}, Compare::TypeToName(Compare::NOT_CONTAINS));
+
+ // invalid type throws
+ EXPECT_THROW(Compare::TypeToName(static_cast<Compare::Type>(42)), std::runtime_error);
+}
+
+TEST(CompareTest, TypeToOperatorOk)
+{
+ { // normal
+ EXPECT_EQ(Compare::TypeToOperator(Compare::EQUAL), string{"=="});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::NOT_EQUAL), string{"!="});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::LESS_THAN), string{"<"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::LESS_THAN_EQUAL), string{"<="});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::GREATER_THAN), string{">"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::GREATER_THAN_EQUAL), string{">="});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::CONTAINS), string{"&"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::NOT_CONTAINS), string{"~"});
+ }
+
+ { // alpha
+ EXPECT_EQ(Compare::TypeToOperator(Compare::EQUAL, true), string{"eq"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::NOT_EQUAL, true), string{"ne"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::LESS_THAN, true), string{"lt"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::LESS_THAN_EQUAL, true), string{"lte"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::GREATER_THAN, true), string{"gt"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::GREATER_THAN_EQUAL, true), string{"gte"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::CONTAINS, true), string{"and"});
+ EXPECT_EQ(Compare::TypeToOperator(Compare::NOT_CONTAINS, true), string{"not"});
+ }
+
+ // invalid type throws
+ EXPECT_THROW(Compare::TypeToOperator(static_cast<Compare::Type>(42)), std::runtime_error);
+}
+
+TEST(CompareTest, FromOperatorOk)
+{
+ EXPECT_EQ(Compare::EQUAL, Compare::TypeFromOperator("=="));
+ EXPECT_EQ(Compare::EQUAL, Compare::TypeFromOperator("="));
+ EXPECT_EQ(Compare::EQUAL, Compare::TypeFromOperator("eq"));
+ EXPECT_EQ(Compare::NOT_EQUAL, Compare::TypeFromOperator("!="));
+ EXPECT_EQ(Compare::NOT_EQUAL, Compare::TypeFromOperator("ne"));
+ EXPECT_EQ(Compare::LESS_THAN, Compare::TypeFromOperator("<"));
+ EXPECT_EQ(Compare::LESS_THAN, Compare::TypeFromOperator("lt"));
+ EXPECT_EQ(Compare::LESS_THAN, Compare::TypeFromOperator("<"));
+ EXPECT_EQ(Compare::LESS_THAN_EQUAL, Compare::TypeFromOperator("<="));
+ EXPECT_EQ(Compare::LESS_THAN_EQUAL, Compare::TypeFromOperator("lte"));
+ EXPECT_EQ(Compare::LESS_THAN_EQUAL, Compare::TypeFromOperator("<="));
+ EXPECT_EQ(Compare::GREATER_THAN, Compare::TypeFromOperator(">"));
+ EXPECT_EQ(Compare::GREATER_THAN, Compare::TypeFromOperator("gt"));
+ EXPECT_EQ(Compare::GREATER_THAN, Compare::TypeFromOperator(">"));
+ EXPECT_EQ(Compare::GREATER_THAN_EQUAL, Compare::TypeFromOperator(">="));
+ EXPECT_EQ(Compare::GREATER_THAN_EQUAL, Compare::TypeFromOperator("gte"));
+ EXPECT_EQ(Compare::GREATER_THAN_EQUAL, Compare::TypeFromOperator(">="));
+ EXPECT_EQ(Compare::CONTAINS, Compare::TypeFromOperator("&"));
+ EXPECT_EQ(Compare::NOT_CONTAINS, Compare::TypeFromOperator("~"));
+
+ // invalid operator strings throw
+ EXPECT_THROW(Compare::TypeFromOperator(""), std::runtime_error);
+ EXPECT_THROW(Compare::TypeFromOperator("invalid"), std::runtime_error);
+}
+
+TEST(CompareTest, AlignedEndOk)
+{
+ BamRecord r1; r1.alignedEnd_ = 300;
+ BamRecord r2; r2.alignedEnd_ = 200;
+ BamRecord r3; r3.alignedEnd_ = 400;
+ BamRecord r4; r4.alignedEnd_ = 100;
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::AlignedEnd());
+
+ EXPECT_EQ(r4.alignedEnd_, records.at(0).AlignedEnd());
+ EXPECT_EQ(r2.alignedEnd_, records.at(1).AlignedEnd());
+ EXPECT_EQ(r1.alignedEnd_, records.at(2).AlignedEnd());
+ EXPECT_EQ(r3.alignedEnd_, records.at(3).AlignedEnd());
+}
+
+TEST(CompareTest, AlignedStartOk)
+{
+ BamRecord r1; r1.alignedStart_ = 300;
+ BamRecord r2; r2.alignedStart_ = 200;
+ BamRecord r3; r3.alignedStart_ = 400;
+ BamRecord r4; r4.alignedStart_ = 100;
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::AlignedStart());
+
+ EXPECT_EQ(r4.alignedStart_, records.at(0).AlignedStart());
+ EXPECT_EQ(r2.alignedStart_, records.at(1).AlignedStart());
+ EXPECT_EQ(r1.alignedStart_, records.at(2).AlignedStart());
+ EXPECT_EQ(r3.alignedStart_, records.at(3).AlignedStart());
+}
+
+TEST(CompareTest, AlignedStrandOk)
+{
+ BamRecord r1; r1.Impl().SetReverseStrand(true);
+ BamRecord r2; r2.Impl().SetReverseStrand(false);
+ BamRecord r3; r3.Impl().SetReverseStrand(true);
+ BamRecord r4; r4.Impl().SetReverseStrand(false);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::AlignedStrand());
+
+ EXPECT_EQ(Strand::FORWARD, records.at(0).AlignedStrand());
+ EXPECT_EQ(Strand::FORWARD, records.at(1).AlignedStrand());
+ EXPECT_EQ(Strand::REVERSE, records.at(2).AlignedStrand());
+ EXPECT_EQ(Strand::REVERSE, records.at(3).AlignedStrand());
+}
+
+TEST(CompareTest, BarcodeForwardOk)
+{
+ BamRecord r1; r1.Barcodes(std::make_pair<int16_t,int16_t>(30,20));
+ BamRecord r2; r2.Barcodes(std::make_pair<int16_t,int16_t>(20,30));
+ BamRecord r3; r3.Barcodes(std::make_pair<int16_t,int16_t>(40,10));
+ BamRecord r4; r4.Barcodes(std::make_pair<int16_t,int16_t>(10,40));
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::BarcodeForward());
+
+ EXPECT_EQ(r4.BarcodeForward(), records.at(0).BarcodeForward());
+ EXPECT_EQ(r2.BarcodeForward(), records.at(1).BarcodeForward());
+ EXPECT_EQ(r1.BarcodeForward(), records.at(2).BarcodeForward());
+ EXPECT_EQ(r3.BarcodeForward(), records.at(3).BarcodeForward());
+}
+
+TEST(CompareTest, BarcodeReverseOk)
+{
+ BamRecord r1; r1.Barcodes(std::make_pair<int16_t,int16_t>(30,20));
+ BamRecord r2; r2.Barcodes(std::make_pair<int16_t,int16_t>(20,30));
+ BamRecord r3; r3.Barcodes(std::make_pair<int16_t,int16_t>(40,10));
+ BamRecord r4; r4.Barcodes(std::make_pair<int16_t,int16_t>(10,40));
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::BarcodeReverse());
+
+ EXPECT_EQ(r3.BarcodeReverse(), records.at(0).BarcodeReverse());
+ EXPECT_EQ(r1.BarcodeReverse(), records.at(1).BarcodeReverse());
+ EXPECT_EQ(r2.BarcodeReverse(), records.at(2).BarcodeReverse());
+ EXPECT_EQ(r4.BarcodeReverse(), records.at(3).BarcodeReverse());
+}
+
+TEST(CompareTest, BarcodeQualityOk)
+{
+ uint8_t q1 = 30;
+ uint8_t q2 = 20;
+ uint8_t q3 = 40;
+ uint8_t q4 = 10;
+
+ auto records = vector<BamRecord>
+ {
+ tests::makeRecordWithTag("bq", Tag(q1)),
+ tests::makeRecordWithTag("bq", Tag(q2)),
+ tests::makeRecordWithTag("bq", Tag(q3)),
+ tests::makeRecordWithTag("bq", Tag(q4))
+ };
+ std::sort(records.begin(), records.end(), Compare::BarcodeQuality());
+
+ EXPECT_EQ(q4, records.at(0).BarcodeQuality());
+ EXPECT_EQ(q2, records.at(1).BarcodeQuality());
+ EXPECT_EQ(q1, records.at(2).BarcodeQuality());
+ EXPECT_EQ(q3, records.at(3).BarcodeQuality());
+}
+
+TEST(CompareTest, CustomCompareOk)
+{
+ struct CustomCompare : public Compare::MemberFunctionBase<bool, &BamRecord::HasDeletionTag> { };
+
+ auto records = vector<BamRecord>
+ {
+ tests::makeRecordWithTag("dt", Tag(string("foo"))),
+ tests::makeRecordWithTag("dt", Tag(string("foo"))),
+ tests::makeRecordWithTag("dt", Tag(string("foo"))),
+ tests::makeRecordWithTag("dt", Tag(string("foo")))
+ };
+ records.push_back(BamRecord());
+ records.push_back(BamRecord());
+ records.push_back(BamRecord());
+ records.push_back(BamRecord());
+ EXPECT_EQ(8, records.size());
+
+ std::sort(records.begin(), records.end(), CustomCompare());
+
+ EXPECT_FALSE(records.at(0).HasDeletionTag());
+ EXPECT_FALSE(records.at(1).HasDeletionTag());
+ EXPECT_FALSE(records.at(2).HasDeletionTag());
+ EXPECT_FALSE(records.at(3).HasDeletionTag());
+ EXPECT_TRUE(records.at(4).HasDeletionTag());
+ EXPECT_TRUE(records.at(5).HasDeletionTag());
+ EXPECT_TRUE(records.at(6).HasDeletionTag());
+ EXPECT_TRUE(records.at(7).HasDeletionTag());
+}
+
+TEST(CompareTest, FullNameOk)
+{
+ BamRecord r1; r1.Impl().Name("c");
+ BamRecord r2; r2.Impl().Name("b");
+ BamRecord r3; r3.Impl().Name("d");
+ BamRecord r4; r4.Impl().Name("a");
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::FullName());
+
+ EXPECT_EQ(r4.FullName(), records.at(0).FullName());
+ EXPECT_EQ(r2.FullName(), records.at(1).FullName());
+ EXPECT_EQ(r1.FullName(), records.at(2).FullName());
+ EXPECT_EQ(r3.FullName(), records.at(3).FullName());
+}
+
+TEST(CompareTest, LocalContextFlagOk)
+{
+ BamRecord r1; r1.LocalContextFlags(LocalContextFlags::BARCODE_AFTER);
+ BamRecord r2; r2.LocalContextFlags(LocalContextFlags::ADAPTER_AFTER);
+ BamRecord r3; r3.LocalContextFlags(LocalContextFlags::REVERSE_PASS);
+ BamRecord r4; r4.LocalContextFlags(LocalContextFlags::NO_LOCAL_CONTEXT);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::LocalContextFlag());
+
+ EXPECT_EQ(r4.LocalContextFlags(), records.at(0).LocalContextFlags());
+ EXPECT_EQ(r2.LocalContextFlags(), records.at(1).LocalContextFlags());
+ EXPECT_EQ(r1.LocalContextFlags(), records.at(2).LocalContextFlags());
+ EXPECT_EQ(r3.LocalContextFlags(), records.at(3).LocalContextFlags());
+}
+
+TEST(CompareTest, MapQualityOk)
+{
+ BamRecord r1; r1.Impl().MapQuality(30);
+ BamRecord r2; r2.Impl().MapQuality(20);
+ BamRecord r3; r3.Impl().MapQuality(40);
+ BamRecord r4; r4.Impl().MapQuality(10);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::MapQuality());
+
+ EXPECT_EQ(r4.MapQuality(), records.at(0).MapQuality());
+ EXPECT_EQ(r2.MapQuality(), records.at(1).MapQuality());
+ EXPECT_EQ(r1.MapQuality(), records.at(2).MapQuality());
+ EXPECT_EQ(r3.MapQuality(), records.at(3).MapQuality());
+}
+
+TEST(CompareTest, MovieNameOk)
+{
+ auto rg1 = ReadGroupInfo { "a", "SUBREAD" };
+ auto rg2 = ReadGroupInfo { "b", "SUBREAD" };
+ auto rg3 = ReadGroupInfo { "c", "SUBREAD" };
+ auto rg4 = ReadGroupInfo { "d", "SUBREAD" };
+
+ BamHeader header;
+ header.AddReadGroup(rg1)
+ .AddReadGroup(rg2)
+ .AddReadGroup(rg3)
+ .AddReadGroup(rg4);
+
+ BamRecord r1(header); r1.ReadGroup(rg3);
+ BamRecord r2(header); r2.ReadGroup(rg2);
+ BamRecord r3(header); r3.ReadGroup(rg4);
+ BamRecord r4(header); r4.ReadGroup(rg1);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::MovieName());
+
+ EXPECT_EQ(r4.MovieName(), records.at(0).MovieName());
+ EXPECT_EQ(r2.MovieName(), records.at(1).MovieName());
+ EXPECT_EQ(r1.MovieName(), records.at(2).MovieName());
+ EXPECT_EQ(r3.MovieName(), records.at(3).MovieName());
+}
+
+TEST(CompareTest, NoneOk)
+{
+ BamRecord r1; r1.Impl().Name("c");
+ BamRecord r2; r2.Impl().Name("b");
+ BamRecord r3; r3.Impl().Name("d");
+ BamRecord r4; r4.Impl().Name("a");
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::None());
+
+ EXPECT_EQ(r1.FullName(), records.at(0).FullName());
+ EXPECT_EQ(r2.FullName(), records.at(1).FullName());
+ EXPECT_EQ(r3.FullName(), records.at(2).FullName());
+ EXPECT_EQ(r4.FullName(), records.at(3).FullName());
+}
+
+TEST(CompareTest, NumDeletedBasesOk)
+{
+ // create test data
+ auto records = tests::makeMappedRecords();
+
+ // sanity checks on initial conditions
+ EXPECT_EQ(6, records.size());
+ EXPECT_EQ(0, records.at(0).NumDeletedBases());
+ EXPECT_EQ(3, records.at(1).NumDeletedBases());
+ EXPECT_EQ(3, records.at(2).NumDeletedBases());
+ EXPECT_EQ(0, records.at(3).NumDeletedBases());
+ EXPECT_EQ(3, records.at(4).NumDeletedBases());
+ EXPECT_EQ(3, records.at(5).NumDeletedBases());
+
+ // sort & check
+ std::sort(records.begin(), records.end(), Compare::NumDeletedBases());
+ EXPECT_EQ(0, records.at(0).NumDeletedBases());
+ EXPECT_EQ(0, records.at(1).NumDeletedBases());
+ EXPECT_EQ(3, records.at(2).NumDeletedBases());
+ EXPECT_EQ(3, records.at(3).NumDeletedBases());
+ EXPECT_EQ(3, records.at(4).NumDeletedBases());
+ EXPECT_EQ(3, records.at(5).NumDeletedBases());
+}
+
+TEST(CompareTest, NumInsertedBasesOk)
+{
+ // create test data
+ auto records = tests::makeMappedRecords();
+
+ // sanity checks on initial conditions
+ EXPECT_EQ(6, records.size());
+ EXPECT_EQ(0, records.at(0).NumInsertedBases());
+ EXPECT_EQ(0, records.at(1).NumInsertedBases());
+ EXPECT_EQ(2, records.at(2).NumInsertedBases());
+ EXPECT_EQ(0, records.at(3).NumInsertedBases());
+ EXPECT_EQ(0, records.at(4).NumInsertedBases());
+ EXPECT_EQ(2, records.at(5).NumInsertedBases());
+
+ // sort & check
+ std::sort(records.begin(), records.end(), Compare::NumInsertedBases());
+ EXPECT_EQ(0, records.at(0).NumInsertedBases());
+ EXPECT_EQ(0, records.at(1).NumInsertedBases());
+ EXPECT_EQ(0, records.at(2).NumInsertedBases());
+ EXPECT_EQ(0, records.at(3).NumInsertedBases());
+ EXPECT_EQ(2, records.at(4).NumInsertedBases());
+ EXPECT_EQ(2, records.at(5).NumInsertedBases());
+}
+
+TEST(CompareTest, NumMatchesOk)
+{
+ // create test data
+ auto records = tests::makeMappedRecords();
+
+ // sanity checks on initial conditions
+ EXPECT_EQ(6, records.size());
+ EXPECT_EQ(10, records.at(0).NumMatches());
+ EXPECT_EQ(10, records.at(1).NumMatches());
+ EXPECT_EQ(6, records.at(2).NumMatches());
+ EXPECT_EQ(10, records.at(3).NumMatches());
+ EXPECT_EQ(10, records.at(4).NumMatches());
+ EXPECT_EQ(6, records.at(5).NumMatches());
+
+ // sort & check
+ std::sort(records.begin(), records.end(), Compare::NumMatches());
+ EXPECT_EQ(6, records.at(0).NumMatches());
+ EXPECT_EQ(6, records.at(1).NumMatches());
+ EXPECT_EQ(10, records.at(2).NumMatches());
+ EXPECT_EQ(10, records.at(3).NumMatches());
+ EXPECT_EQ(10, records.at(4).NumMatches());
+ EXPECT_EQ(10, records.at(5).NumMatches());
+}
+
+TEST(CompareTest, NumMismatchesOk)
+{
+ // create test data
+ auto records = tests::makeMappedRecords();
+
+ // sanity checks on initial conditions
+ EXPECT_EQ(6, records.size());
+ EXPECT_EQ(0, records.at(0).NumMismatches());
+ EXPECT_EQ(0, records.at(1).NumMismatches());
+ EXPECT_EQ(2, records.at(2).NumMismatches());
+ EXPECT_EQ(0, records.at(3).NumMismatches());
+ EXPECT_EQ(0, records.at(4).NumMismatches());
+ EXPECT_EQ(2, records.at(5).NumMismatches());
+
+ // sort & check
+ std::sort(records.begin(), records.end(), Compare::NumMismatches());
+ EXPECT_EQ(0, records.at(0).NumMismatches());
+ EXPECT_EQ(0, records.at(1).NumMismatches());
+ EXPECT_EQ(0, records.at(2).NumMismatches());
+ EXPECT_EQ(0, records.at(3).NumMismatches());
+ EXPECT_EQ(2, records.at(4).NumMismatches());
+ EXPECT_EQ(2, records.at(5).NumMismatches());
+}
+
+TEST(CompareTest, QueryEndOk)
+{
+ Position q1 = 30;
+ Position q2 = 20;
+ Position q3 = 40;
+ Position q4 = 10;
+
+ auto records = vector<BamRecord>
+ {
+ tests::makeRecordWithTag("qe", Tag(q1)),
+ tests::makeRecordWithTag("qe", Tag(q2)),
+ tests::makeRecordWithTag("qe", Tag(q3)),
+ tests::makeRecordWithTag("qe", Tag(q4))
+ };
+ std::sort(records.begin(), records.end(), Compare::QueryEnd());
+
+ EXPECT_EQ(q4, records.at(0).QueryEnd());
+ EXPECT_EQ(q2, records.at(1).QueryEnd());
+ EXPECT_EQ(q1, records.at(2).QueryEnd());
+ EXPECT_EQ(q3, records.at(3).QueryEnd());
+}
+
+TEST(CompareTest, QueryStartOk)
+{
+ Position q1 = 30;
+ Position q2 = 20;
+ Position q3 = 40;
+ Position q4 = 10;
+
+ auto records = vector<BamRecord>
+ {
+ tests::makeRecordWithTag("qs", Tag(q1)),
+ tests::makeRecordWithTag("qs", Tag(q2)),
+ tests::makeRecordWithTag("qs", Tag(q3)),
+ tests::makeRecordWithTag("qs", Tag(q4))
+ };
+ std::sort(records.begin(), records.end(), Compare::QueryStart());
+
+ EXPECT_EQ(q4, records.at(0).QueryStart());
+ EXPECT_EQ(q2, records.at(1).QueryStart());
+ EXPECT_EQ(q1, records.at(2).QueryStart());
+ EXPECT_EQ(q3, records.at(3).QueryStart());
+}
+
+TEST(CompareTest, ReadGroupIdOk)
+{
+ auto rg1 = ReadGroupInfo { "foo", "SUBREAD" };
+ auto rg2 = ReadGroupInfo { "bar", "SUBREAD" };
+ auto rg3 = ReadGroupInfo { "c", "SUBREAD" };
+ auto rg4 = ReadGroupInfo { "d", "SUBREAD" };
+
+ BamHeader header;
+ header.AddReadGroup(rg1)
+ .AddReadGroup(rg2)
+ .AddReadGroup(rg3)
+ .AddReadGroup(rg4);
+
+ BamRecord r1(header); r1.ReadGroup(rg3); // -> 99365356
+ BamRecord r2(header); r2.ReadGroup(rg2); // -> d9f305e4
+ BamRecord r3(header); r3.ReadGroup(rg4); // -> 54397cd6
+ BamRecord r4(header); r4.ReadGroup(rg1); // -> a60ddc69
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::ReadGroupId()); // lexical, NOT numeric ordering
+
+ EXPECT_EQ(r3.ReadGroupId(), records.at(0).ReadGroupId());
+ EXPECT_EQ(r1.ReadGroupId(), records.at(1).ReadGroupId());
+ EXPECT_EQ(r4.ReadGroupId(), records.at(2).ReadGroupId());
+ EXPECT_EQ(r2.ReadGroupId(), records.at(3).ReadGroupId());
+}
+
+TEST(CompareTest, ReadGroupNumericIdOk)
+{
+ auto rg1 = ReadGroupInfo { "a", "SUBREAD" };
+ auto rg2 = ReadGroupInfo { "b", "SUBREAD" };
+ auto rg3 = ReadGroupInfo { "c", "SUBREAD" };
+ auto rg4 = ReadGroupInfo { "d", "SUBREAD" };
+
+ BamHeader header;
+ header.AddReadGroup(rg1)
+ .AddReadGroup(rg2)
+ .AddReadGroup(rg3)
+ .AddReadGroup(rg4);
+
+ BamRecord r1(header); r1.ReadGroup(rg3); // -> -1724492970
+ BamRecord r2(header); r2.ReadGroup(rg2); // -> 235381373
+ BamRecord r3(header); r3.ReadGroup(rg4); // -> 1413053654
+ BamRecord r4(header); r4.ReadGroup(rg1); // -> 1153643386
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::ReadGroupNumericId()); // numeric ordering
+
+ EXPECT_EQ(r1.ReadGroupNumericId(), records.at(0).ReadGroupNumericId());
+ EXPECT_EQ(r2.ReadGroupNumericId(), records.at(1).ReadGroupNumericId());
+ EXPECT_EQ(r4.ReadGroupNumericId(), records.at(2).ReadGroupNumericId());
+ EXPECT_EQ(r3.ReadGroupNumericId(), records.at(3).ReadGroupNumericId());
+}
+
+TEST(CompareTest, ReadAccuracyOk)
+{
+ Accuracy a1 = 30;
+ Accuracy a2 = 20;
+ Accuracy a3 = 40;
+ Accuracy a4 = 10;
+
+ auto records = vector<BamRecord>
+ {
+ tests::makeRecordWithTag("rq", Tag(a1)),
+ tests::makeRecordWithTag("rq", Tag(a2)),
+ tests::makeRecordWithTag("rq", Tag(a3)),
+ tests::makeRecordWithTag("rq", Tag(a4))
+ };
+ std::sort(records.begin(), records.end(), Compare::ReadAccuracy());
+
+ EXPECT_EQ(a4, records.at(0).ReadAccuracy());
+ EXPECT_EQ(a2, records.at(1).ReadAccuracy());
+ EXPECT_EQ(a1, records.at(2).ReadAccuracy());
+ EXPECT_EQ(a3, records.at(3).ReadAccuracy());
+}
+
+TEST(CompareTest, ReferenceEndOk)
+{
+ // create test data
+ auto records = tests::makeMappedRecords();
+
+ // sanity checks on initial conditions
+ EXPECT_EQ(6, records.size());
+ EXPECT_EQ(110, records.at(0).ReferenceEnd());
+ EXPECT_EQ(113, records.at(1).ReferenceEnd());
+ EXPECT_EQ(111, records.at(2).ReferenceEnd());
+ EXPECT_EQ(110, records.at(3).ReferenceEnd());
+ EXPECT_EQ(113, records.at(4).ReferenceEnd());
+ EXPECT_EQ(111, records.at(5).ReferenceEnd());
+
+ // sort & check
+ std::sort(records.begin(), records.end(), Compare::ReferenceEnd());
+ EXPECT_EQ(110, records.at(0).ReferenceEnd());
+ EXPECT_EQ(110, records.at(1).ReferenceEnd());
+ EXPECT_EQ(111, records.at(2).ReferenceEnd());
+ EXPECT_EQ(111, records.at(3).ReferenceEnd());
+ EXPECT_EQ(113, records.at(4).ReferenceEnd());
+ EXPECT_EQ(113, records.at(5).ReferenceEnd());
+}
+
+TEST(CompareTest, ReferenceIdOk)
+{
+ BamRecord r1; r1.Impl().ReferenceId(30);
+ BamRecord r2; r2.Impl().ReferenceId(20);
+ BamRecord r3; r3.Impl().ReferenceId(40);
+ BamRecord r4; r4.Impl().ReferenceId(10);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::ReferenceId());
+
+ EXPECT_EQ(r4.ReferenceId(), records.at(0).ReferenceId());
+ EXPECT_EQ(r2.ReferenceId(), records.at(1).ReferenceId());
+ EXPECT_EQ(r1.ReferenceId(), records.at(2).ReferenceId());
+ EXPECT_EQ(r3.ReferenceId(), records.at(3).ReferenceId());
+}
+
+TEST(CompareTest, ReferenceNameOk)
+{
+ auto seq1 = SequenceInfo { "seq1" };
+ auto seq2 = SequenceInfo { "seq2" };
+ auto seq3 = SequenceInfo { "seq3" };
+ auto seq4 = SequenceInfo { "seq4" };
+
+ BamHeader header;
+ header.AddSequence(seq1) // -> 0
+ .AddSequence(seq2) // -> 1
+ .AddSequence(seq3) // -> 2
+ .AddSequence(seq4); // -> 3
+
+ BamRecord r1(header); r1.Impl().SetMapped(true); r1.Impl().ReferenceId(2);
+ BamRecord r2(header); r2.Impl().SetMapped(true); r2.Impl().ReferenceId(1);
+ BamRecord r3(header); r3.Impl().SetMapped(true); r3.Impl().ReferenceId(3);
+ BamRecord r4(header); r4.Impl().SetMapped(true); r4.Impl().ReferenceId(0);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::ReferenceName());
+
+ EXPECT_EQ(seq1.Name(), records.at(0).ReferenceName());
+ EXPECT_EQ(seq2.Name(), records.at(1).ReferenceName());
+ EXPECT_EQ(seq3.Name(), records.at(2).ReferenceName());
+ EXPECT_EQ(seq4.Name(), records.at(3).ReferenceName());
+}
+
+TEST(CompareTest, ReferenceStartOk)
+{
+ BamRecord r1; r1.Impl().Position(30);
+ BamRecord r2; r2.Impl().Position(20);
+ BamRecord r3; r3.Impl().Position(40);
+ BamRecord r4; r4.Impl().Position(10);
+
+ auto records = vector<BamRecord>{ r1, r2, r3, r4 };
+ std::sort(records.begin(), records.end(), Compare::ReferenceStart());
+
+ EXPECT_EQ(r4.ReferenceStart(), records.at(0).ReferenceStart());
+ EXPECT_EQ(r2.ReferenceStart(), records.at(1).ReferenceStart());
+ EXPECT_EQ(r1.ReferenceStart(), records.at(2).ReferenceStart());
+ EXPECT_EQ(r3.ReferenceStart(), records.at(3).ReferenceStart());
+}
+
+TEST(CompareTest, ZmwOk)
+{
+ int32_t z1 = 30;
+ int32_t z2 = 20;
+ int32_t z3 = 40;
+ int32_t z4 = 10;
+
+ auto records = vector<BamRecord>
+ {
+ tests::makeRecordWithTag("zm", Tag(z1)),
+ tests::makeRecordWithTag("zm", Tag(z2)),
+ tests::makeRecordWithTag("zm", Tag(z3)),
+ tests::makeRecordWithTag("zm", Tag(z4))
+ };
+ std::sort(records.begin(), records.end(), Compare::Zmw());
+
+ EXPECT_EQ(z4, records.at(0).HoleNumber());
+ EXPECT_EQ(z2, records.at(1).HoleNumber());
+ EXPECT_EQ(z1, records.at(2).HoleNumber());
+ EXPECT_EQ(z3, records.at(3).HoleNumber());
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/DataSet.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace tests {
+
+static inline
+DataSet CreateDataSet(void)
+{
+ DataSet d;
+ d.Name("foo");
+ return d;
+}
+
+} // namespace tests
+
+TEST(DataSetCoreTest, XmlNameParts)
+{
+ internal::XmlName name("ns:node_name");
+ EXPECT_EQ(boost::string_ref("ns"), name.Prefix());
+ EXPECT_EQ(boost::string_ref("node_name"), name.LocalName());
+ EXPECT_EQ(boost::string_ref("ns:node_name"), name.QualifiedName());
+
+ internal::XmlName bareName("node_name");
+ EXPECT_EQ(boost::string_ref(""), bareName.Prefix());
+ EXPECT_EQ(boost::string_ref("node_name"), bareName.LocalName());
+ EXPECT_EQ(boost::string_ref("node_name"), bareName.QualifiedName());
+
+ internal::XmlName leadingColon(":node_name");
+ EXPECT_EQ(boost::string_ref(""), leadingColon.Prefix());
+ EXPECT_EQ(boost::string_ref(":node_name"), leadingColon.LocalName());
+ EXPECT_EQ(boost::string_ref(":node_name"), leadingColon.QualifiedName());
+}
+
+TEST(DataSetCoreTest, DefaultsOk)
+{
+ DataSet dataset;
+ EXPECT_EQ(DataSet::GENERIC, dataset.Type());
+ EXPECT_FALSE(dataset.CreatedAt().empty());
+ EXPECT_FALSE(dataset.MetaType().empty());
+ EXPECT_FALSE(dataset.TimeStampedName().empty());
+ EXPECT_FALSE(dataset.UniqueId().empty());
+ EXPECT_FALSE(dataset.Version().empty());
+
+ EXPECT_EQ(0, dataset.TimeStampedName().find("pacbio_dataset_"));
+
+ EXPECT_TRUE(dataset.Format().empty());
+ EXPECT_TRUE(dataset.ModifiedAt().empty());
+ EXPECT_TRUE(dataset.Name().empty());
+ EXPECT_TRUE(dataset.ResourceId().empty());
+ EXPECT_TRUE(dataset.Tags().empty());
+ EXPECT_EQ(0, dataset.ExternalResources().Size());
+ EXPECT_EQ(0, dataset.Filters().Size());
+ EXPECT_EQ(0, dataset.SubDataSets().Size());
+
+ EXPECT_EQ(string{"3.0.1"}, dataset.Version());
+}
+
+TEST(DataSetCoreTest, TimeStampedNamesOk)
+{
+ DataSet dataset;
+ AlignmentSet alignmentSet;
+ BarcodeSet barcodeSet;
+ ContigSet contigSet;
+ ConsensusAlignmentSet consensusAlignmentSet;
+ ConsensusReadSet consensusReadSet;
+ HdfSubreadSet hdfSubreadSet;
+ ReferenceSet referenceSet;
+ SubreadSet subreadSet;
+
+ EXPECT_EQ(0, dataset.TimeStampedName().find("pacbio_dataset_dataset-"));
+ EXPECT_EQ(0, alignmentSet.TimeStampedName().find("pacbio_dataset_alignmentset-"));
+ EXPECT_EQ(0, barcodeSet.TimeStampedName().find("pacbio_dataset_barcodeset-"));
+ EXPECT_EQ(0, contigSet.TimeStampedName().find("pacbio_dataset_contigset-"));
+ EXPECT_EQ(0, consensusAlignmentSet.TimeStampedName().find("pacbio_dataset_consensusalignmentset-"));
+ EXPECT_EQ(0, consensusReadSet.TimeStampedName().find("pacbio_dataset_consensusreadset-"));
+ EXPECT_EQ(0, hdfSubreadSet.TimeStampedName().find("pacbio_dataset_hdfsubreadset-"));
+ EXPECT_EQ(0, referenceSet.TimeStampedName().find("pacbio_dataset_referenceset-"));
+ EXPECT_EQ(0, subreadSet.TimeStampedName().find("pacbio_dataset_subreadset-"));
+}
+
+TEST(DataSetCoreTest, BasicGettersSettersOk)
+{
+ DataSet dataset;
+ dataset.CreatedAt("now");
+ dataset.Format("format");
+ dataset.MetaType("meta");
+ dataset.ModifiedAt("later");
+ dataset.Name("foo");
+ dataset.ResourceId("path/to/file");
+ dataset.Tags("tag tag");
+ dataset.TimeStampedName("now:30");
+ dataset.UniqueId("uuid");
+ dataset.Version("0.0.0");
+
+ EXPECT_EQ(string("now"), dataset.CreatedAt());
+ EXPECT_EQ(string("format"), dataset.Format());
+ EXPECT_EQ(string("meta"), dataset.MetaType());
+ EXPECT_EQ(string("later"), dataset.ModifiedAt());
+ EXPECT_EQ(string("foo"), dataset.Name());
+ EXPECT_EQ(string("path/to/file"), dataset.ResourceId());
+ EXPECT_EQ(string("tag tag"), dataset.Tags());
+ EXPECT_EQ(string("now:30"), dataset.TimeStampedName());
+ EXPECT_EQ(string("uuid"), dataset.UniqueId());
+ EXPECT_EQ(string("0.0.0"), dataset.Version());
+}
+
+TEST(DataSetCoreTest, CopyOk)
+{
+ DataSet d1;
+ d1.Name("foo");
+
+ // copy ctor
+ DataSet d2(d1);
+ EXPECT_EQ(string("foo"), d2.Name());
+
+ // copy assignment
+ DataSet d3;
+ d3 = d1;
+ EXPECT_EQ(string("foo"), d3.Name());
+}
+
+TEST(DataSetCoreTest, MoveOk)
+{
+ DataSet d1;
+ d1.Name("foo");
+
+ // move ctor
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpessimizing-move"
+#endif
+ DataSet d2(std::move(tests::CreateDataSet()));
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+ EXPECT_EQ(string("foo"), d2.Name());
+
+ // move assignment
+ DataSet d3;
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpessimizing-move"
+#endif
+ d3 = std::move(tests::CreateDataSet());
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+ EXPECT_EQ(string("foo"), d3.Name());
+}
+
+TEST(DataSetCoreTest, AddExternalResources)
+{
+ DataSet dataset;
+ EXPECT_EQ(0, dataset.ExternalResources().Size());
+
+ ExternalResource resource1("metatype", "id");
+ resource1.Name("file1");
+
+ ExternalResource resource2("metatype", "id2");
+ resource2.Name("file2");
+
+ dataset.ExternalResources().Add(resource1);
+ dataset.ExternalResources().Add(resource2);
+ EXPECT_EQ(2, dataset.ExternalResources().Size());
+
+ // disallow duplicates (checking on ResourceId)
+ ExternalResource duplicateResource("metatype", "id");
+ dataset.ExternalResources().Add(duplicateResource);
+ EXPECT_EQ(2, dataset.ExternalResources().Size());
+
+ // direct access
+ const ExternalResources& resources = dataset.ExternalResources();
+ EXPECT_EQ(string("file1"), resources[0].Name());
+ EXPECT_EQ(string("file2"), resources[1].Name());
+
+ // iterable
+ size_t i = 0;
+ for (auto r : resources) {
+ if (i == 0)
+ EXPECT_EQ(string("file1"), r.Name());
+ else
+ EXPECT_EQ(string("file2"), r.Name());
+ ++i;
+ }
+}
+
+TEST(DataSetCoreTest, EditExternalResources)
+{
+ DataSet dataset;
+
+ ExternalResource resource("metatype", "id");
+ resource.Name("file1");
+ dataset.ExternalResources().Add(resource);
+
+ resource.Name("file2").ResourceId("id2");
+ dataset.ExternalResources().Add(resource);
+ EXPECT_EQ(2, dataset.ExternalResources().Size());
+
+ // edit
+ dataset.ExternalResources()[0].Name("some new name");
+ EXPECT_EQ(string("some new name"), dataset.ExternalResources()[0].Name());
+ EXPECT_EQ(string("file2"), dataset.ExternalResources()[1].Name());
+}
+
+TEST(DataSetCoreTest, NestedExternalResources)
+{
+ ExternalResource resource("metatype", "filename");
+ resource.ExternalResources().Add(ExternalResource("metatype.child", "filename.child"));
+ resource.ExternalResources().Add(ExternalResource("metatype.child2", "filename.child2"));
+
+ const ExternalResources& childResources = resource.ExternalResources();
+ EXPECT_EQ(2, childResources.Size());
+ EXPECT_EQ(string("metatype.child"), childResources[0].MetaType());
+ EXPECT_EQ(string("metatype.child2"), childResources[1].MetaType());
+ EXPECT_EQ(string("filename.child"), childResources[0].ResourceId());
+ EXPECT_EQ(string("filename.child2"), childResources[1].ResourceId());
+}
+
+TEST(DataSetCoreTest, AddFilters)
+{
+ DataSet dataset;
+ EXPECT_EQ(0, dataset.Filters().Size());
+
+ Filter filter;
+ filter.Properties().Add(Property("rq", "0.85", ">"));
+ filter.Properties().Add(Property("RNAME", "chr1", "=="));
+ EXPECT_EQ(2, filter.Properties().Size());
+
+ Filter filter2;
+ filter2.Properties().Add(Property("rq", "0.50", ">="));
+ filter2.Properties().Add(Property("RNAME", "chr2", "!="));
+ EXPECT_EQ(2, filter2.Properties().Size());
+
+ dataset.Filters().Add(filter);
+ dataset.Filters().Add(filter2);
+
+ const Filters& filters = dataset.Filters();
+ EXPECT_EQ(2, filters.Size());
+ EXPECT_EQ(2, filters[0].Properties().Size());
+ EXPECT_EQ(2, filters[1].Properties().Size());
+
+ // direct access
+ const Property& p0 = filters[0].Properties()[0];
+ EXPECT_EQ(string("rq"), p0.Name());
+ EXPECT_EQ(string("0.85"), p0.Value());
+ EXPECT_EQ(string(">"), p0.Operator());
+
+ const Property& p1 = filters[0].Properties()[1];
+ EXPECT_EQ(string("RNAME"), p1.Name());
+ EXPECT_EQ(string("chr1"), p1.Value());
+ EXPECT_EQ(string("=="), p1.Operator());
+
+ const Property& p2 = filters[1].Properties()[0];
+ EXPECT_EQ(string("rq"), p2.Name());
+ EXPECT_EQ(string("0.50"), p2.Value());
+ EXPECT_EQ(string(">="), p2.Operator());
+
+ const Property& p3 = filters[1].Properties()[1];
+ EXPECT_EQ(string("RNAME"), p3.Name());
+ EXPECT_EQ(string("chr2"), p3.Value());
+ EXPECT_EQ(string("!="), p3.Operator());
+
+ // iteratable
+ size_t i = 0;
+ size_t j = 0;
+ for (const Filter& f : filters) {
+ if (i == 0) {
+ const Properties& properties = f.Properties();
+ for (const Property& p : properties) {
+ if (j == 0) {
+ EXPECT_EQ(string("rq"), p.Name());
+ EXPECT_EQ(string("0.85"), p.Value());
+ EXPECT_EQ(string(">"), p.Operator());
+ } else {
+ EXPECT_EQ(string("RNAME"), p.Name());
+ EXPECT_EQ(string("chr1"), p.Value());
+ EXPECT_EQ(string("=="), p.Operator());
+ }
+ ++j;
+ }
+ } else {
+ const Properties& properties = f.Properties();
+ for (const Property& p : properties) {
+ if (j == 0) {
+ EXPECT_EQ(string("rq"), p.Name());
+ EXPECT_EQ(string("0.50"), p.Value());
+ EXPECT_EQ(string(">="), p.Operator());
+ } else {
+ EXPECT_EQ(string("RNAME"), p.Name());
+ EXPECT_EQ(string("chr2"), p.Value());
+ EXPECT_EQ(string("!="), p.Operator());
+ }
+ ++j;
+ }
+ }
+ ++i;
+ j = 0;
+ }
+
+}
+
+TEST(DataSetCoreTest, EditFilters)
+{
+ DataSet dataset;
+ EXPECT_EQ(0, dataset.Filters().Size());
+
+ Filter filter;
+ filter.Properties().Add(Property("rq", "0.85", ">"));
+ filter.Properties().Add(Property("RNAME", "chr1", "=="));
+ EXPECT_EQ(2, filter.Properties().Size());
+
+ Filter filter2;
+ filter2.Properties().Add(Property("rq", "0.50", ">="));
+ filter2.Properties().Add(Property("RNAME", "chr2", "!="));
+ EXPECT_EQ(2, filter2.Properties().Size());
+
+ dataset.Filters().Add(filter);
+ dataset.Filters().Add(filter2);
+ EXPECT_EQ(2, dataset.Filters().Size());
+ EXPECT_EQ(2, dataset.Filters()[0].Properties().Size());
+ EXPECT_EQ(2, dataset.Filters()[1].Properties().Size());
+
+ // edit property in-place
+ Property& p = dataset.Filters()[0].Properties()[0];
+ p.Name("someNewName");
+ p.Value("someNewValue");
+ p.Operator("==");
+
+ const Property& p0 = dataset.Filters()[0].Properties()[0];
+ EXPECT_EQ(string("someNewName"), p0.Name());
+ EXPECT_EQ(string("someNewValue"), p0.Value());
+ EXPECT_EQ(string("=="), p0.Operator());
+
+ const Property& p1 = dataset.Filters()[0].Properties()[1];
+ EXPECT_EQ(string("RNAME"), p1.Name());
+ EXPECT_EQ(string("chr1"), p1.Value());
+ EXPECT_EQ(string("=="), p1.Operator());
+
+ const Property& p2 = dataset.Filters()[1].Properties()[0];
+ EXPECT_EQ(string("rq"), p2.Name());
+ EXPECT_EQ(string("0.50"), p2.Value());
+ EXPECT_EQ(string(">="), p2.Operator());
+
+ const Property& p3 = dataset.Filters()[1].Properties()[1];
+ EXPECT_EQ(string("RNAME"), p3.Name());
+ EXPECT_EQ(string("chr2"), p3.Value());
+ EXPECT_EQ(string("!="), p3.Operator());
+}
+
+TEST(DataSetCoreTest, AddSubDataSets)
+{
+ DataSet dataset;
+ EXPECT_EQ(0, dataset.SubDataSets().Size());
+
+ DataSetBase sub1;
+ sub1.Name("subset_1");
+
+ DataSetBase sub2;
+ sub2.Name("subset_2");
+
+ dataset.SubDataSets().Add(sub1);
+ dataset.SubDataSets().Add(sub2);
+ EXPECT_EQ(2, dataset.SubDataSets().Size());
+
+ // direct access
+ const SubDataSets& subdatasets = dataset.SubDataSets();
+ EXPECT_EQ(string("subset_1"), subdatasets[0].Name());
+ EXPECT_EQ(string("subset_2"), subdatasets[1].Name());
+
+ // iterable
+ size_t i = 0;
+ for (const DataSetBase& ds : subdatasets) {
+ if (i == 0)
+ EXPECT_EQ(string("subset_1"), ds.Name());
+ else
+ EXPECT_EQ(string("subset_2"), ds.Name());
+ ++i;
+ }
+}
+
+TEST(DataSetCoreTest, EditSubDataSets)
+{
+ DataSet dataset;
+ EXPECT_EQ(0, dataset.SubDataSets().Size());
+
+ DataSetBase sub1;
+ sub1.Name("subset_1");
+
+ DataSetBase sub2;
+ sub2.Name("subset_2");
+
+ dataset.SubDataSets().Add(sub1);
+ dataset.SubDataSets().Add(sub2);
+ EXPECT_EQ(2, dataset.SubDataSets().Size());
+
+ // edit
+ dataset.SubDataSets()[0].Name("subset_1_edited");
+
+ // direct access
+ const SubDataSets& subdatasets = dataset.SubDataSets();
+ EXPECT_EQ(string("subset_1_edited"), subdatasets[0].Name());
+ EXPECT_EQ(string("subset_2"), subdatasets[1].Name());
+
+ // iterable
+ size_t i = 0;
+ for (const DataSetBase& ds : subdatasets) {
+ if (i == 0)
+ EXPECT_EQ(string("subset_1_edited"), ds.Name());
+ else
+ EXPECT_EQ(string("subset_2"), ds.Name());
+ ++i;
+ }
+}
+
+TEST(DataSetCoreTest, RemoveExternalResources)
+{
+ DataSet dataset;
+ EXPECT_EQ(0, dataset.ExternalResources().Size());
+
+ ExternalResource resource1("metatype", "id");
+ resource1.Name("file1");
+
+ ExternalResource resource2("metatype", "id2");
+ resource2.Name("file2");
+
+ dataset.ExternalResources().Add(resource1);
+ dataset.ExternalResources().Add(resource2);
+ EXPECT_EQ(2, dataset.ExternalResources().Size());
+
+ // remove
+ dataset.ExternalResources().Remove(resource1);
+ EXPECT_EQ(1, dataset.ExternalResources().Size());
+
+ // direct access
+ const ExternalResources& resources = dataset.ExternalResources();
+ EXPECT_EQ(string("file2"), resources[0].Name());
+
+ // iterable
+ size_t i = 0;
+ for (auto r : resources) {
+ if (i == 0)
+ EXPECT_EQ(string("file2"), r.Name());
+ ++i;
+ }
+}
+
+TEST(DataSetCoreTest, RemoveFilters)
+{
+ DataSet dataset;
+ EXPECT_EQ(0, dataset.Filters().Size());
+
+ Filter filter;
+ filter.Properties().Add(Property("rq", "0.85", ">"));
+ filter.Properties().Add(Property("RNAME", "chr1", "=="));
+ EXPECT_EQ(2, filter.Properties().Size());
+
+ Filter filter2;
+ filter2.Properties().Add(Property("rq", "0.50", ">="));
+ filter2.Properties().Add(Property("RNAME", "chr2", "!="));
+ EXPECT_EQ(2, filter2.Properties().Size());
+
+ dataset.Filters().Add(filter);
+ dataset.Filters().Add(filter2);
+ EXPECT_EQ(2, dataset.Filters().Size());
+
+ // remove
+ dataset.Filters().Remove(filter);
+ EXPECT_EQ(1, dataset.Filters().Size());
+
+ const Filters& filters = dataset.Filters();
+ EXPECT_EQ(2, filters[0].Properties().Size());
+}
+
+TEST(DataSetCoreTest, RemoveSubDataSets)
+{
+ DataSet dataset;
+ EXPECT_EQ(0, dataset.SubDataSets().Size());
+
+ DataSetBase sub1;
+ sub1.Name("subset_1");
+
+ DataSetBase sub2;
+ sub2.Name("subset_2");
+
+ dataset.SubDataSets().Add(sub1);
+ dataset.SubDataSets().Add(sub2);
+ EXPECT_EQ(2, dataset.SubDataSets().Size());
+
+ // remove
+ dataset.SubDataSets().Remove(sub2);
+ EXPECT_EQ(1, dataset.SubDataSets().Size());
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include "../src/FileUtils.h"
+#include <gtest/gtest.h>
+#include <pbbam/DataSet.h>
+#include <pbbam/internal/DataSetElement.h>
+#include <fstream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+#include <unistd.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+const string alignedBamFn = tests::Data_Dir + "/aligned.bam";
+const string bamGroupFofn = tests::Generated_Dir + "/group.fofn";
+
+const string ali1XmlFn = tests::Data_Dir + "/dataset/ali1.xml";
+const string ali2XmlFn = tests::Data_Dir + "/dataset/ali2.xml";
+const string ali3XmlFn = tests::Data_Dir + "/dataset/ali3.xml";
+const string ali4XmlFn = tests::Data_Dir + "/dataset/ali4.xml";
+const string mappingStaggeredXmlFn = tests::Data_Dir + "/dataset/bam_mapping_staggered.xml";
+const string barcodeXmlFn = tests::Data_Dir + "/dataset/barcode.dataset.xml";
+const string ccsReadXmlFn = tests::Data_Dir + "/dataset/ccsread.dataset.xml";
+const string lambdaContigsXmlFn = tests::Data_Dir + "/dataset/lambda_contigs.xml";
+const string pbalchemyXmlFn = tests::Data_Dir + "/dataset/pbalchemy10kbp.xml";
+const string referenceXmlFn = tests::Data_Dir + "/dataset/reference.dataset.xml";
+const string subread1XmlFn = tests::Data_Dir + "/dataset/subread_dataset1.xml";
+const string subread2XmlFn = tests::Data_Dir + "/dataset/subread_dataset2.xml";
+const string subread3XmlFn = tests::Data_Dir + "/dataset/subread_dataset3.xml";
+const string transformedXmlFn = tests::Data_Dir + "/dataset/transformed_rs_subread_dataset.xml";
+
+static void TestFromXmlString(void);
+static void TestAli1Xml(void);
+static void TestAli2Xml(void);
+static void TestAli3Xml(void);
+static void TestAli4Xml(void);
+static void TestMappingStaggeredXml(void);
+static void TestBarcodeXml(void);
+static void TestCcsReadXml(void);
+static void TestLambdaContigsXml(void);
+static void TestPbalchemyXml(void);
+static void TestReferenceXml(void);
+static void TestSubread1Xml(void);
+static void TestSubread2Xml(void);
+static void TestSubread3Xml(void);
+static void TestTransformedXml(void);
+
+static inline
+void changeCurrentDirectory(const std::string& dir)
+{ ASSERT_EQ(0, chdir(dir.c_str())); }
+
+TEST(DataSetIOTest, FromBamFilename)
+{
+ DataSet dataset(alignedBamFn);
+
+ EXPECT_EQ(1, dataset.ExternalResources().Size());
+ const ExternalResource& bamRef = dataset.ExternalResources()[0];
+
+ EXPECT_EQ(alignedBamFn, bamRef.ResourceId());
+}
+
+TEST(DataSetIOTest, FromBamFilenames)
+{
+ std::ifstream fofn(bamGroupFofn);
+ std::vector<std::string> files;
+ std::string file;
+ while (std::getline(fofn, file)) if (!file.empty()) files.emplace_back(file);
+ DataSet dataset(files);
+ EXPECT_EQ(3, dataset.ExternalResources().Size());
+}
+
+TEST(DataSetIOTest, FromBamFileObject)
+{
+ BamFile bamFile(alignedBamFn);
+ DataSet dataset(bamFile.Filename());
+
+ EXPECT_EQ(1, dataset.ExternalResources().Size());
+ const ExternalResource& bamRef = dataset.ExternalResources()[0];
+
+ EXPECT_EQ(alignedBamFn, bamRef.ResourceId());
+}
+
+TEST(DataSetIOTest, FromFofn)
+{
+ DataSet dataset(bamGroupFofn);
+ EXPECT_EQ(3, dataset.ExternalResources().Size());
+}
+
+TEST(DataSetIOTest, FromXml)
+{
+ EXPECT_NO_THROW(TestFromXmlString());
+}
+
+TEST(DataSetIOTest, FromXmlFile)
+{
+ EXPECT_NO_THROW(TestAli1Xml());
+ EXPECT_NO_THROW(TestAli2Xml());
+ EXPECT_NO_THROW(TestAli3Xml());
+ EXPECT_NO_THROW(TestAli4Xml());
+ EXPECT_NO_THROW(TestMappingStaggeredXml());
+ EXPECT_NO_THROW(TestBarcodeXml());
+ EXPECT_NO_THROW(TestCcsReadXml());
+ EXPECT_NO_THROW(TestLambdaContigsXml());
+ EXPECT_NO_THROW(TestPbalchemyXml());
+ EXPECT_NO_THROW(TestReferenceXml());
+ EXPECT_NO_THROW(TestSubread1Xml());
+ EXPECT_NO_THROW(TestSubread2Xml());
+ EXPECT_NO_THROW(TestSubread3Xml());
+ EXPECT_NO_THROW(TestTransformedXml());
+}
+
+TEST(DataSetIOTest, ThrowsOnNonexistentFofnFile)
+{
+ EXPECT_THROW(DataSet{"does/not/exist.fofn"}, std::runtime_error);
+}
+
+TEST(DataSetIOTest, ThrowsOnNonexistentXmlFile)
+{
+ EXPECT_THROW(DataSet{"does/not/exist.xml"}, std::runtime_error);
+}
+
+TEST(DataSetIOTest, ToXml)
+{
+ // top-level data
+ DataSet dataset(DataSet::ALIGNMENT);
+ dataset.CreatedAt("2015-01-27T09:00:01");
+ dataset.MetaType("PacBio.DataSet.AlignmentSet");
+ dataset.Name("DataSet_AlignmentSet");
+ dataset.Tags("barcode moreTags mapping mytags");
+ dataset.TimeStampedName("my_tsn");
+ dataset.UniqueId("b095d0a3-94b8-4918-b3af-a3f81bbe519c");
+ dataset.Attribute("xmlns", "http://pacificbiosciences.com/PacBioDatasets.xsd")
+ .Attribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
+ .Attribute("xsi:schemaLocation", "http://pacificbiosciences.com/PacBioDatasets.xsd");
+
+ // external resources
+ ExternalResource resource1("AlignmentFile.AlignmentBamFile", "file:/mnt/path/to/alignments2.bam");
+ resource1.Name("Third Alignments BAM");
+ resource1.Description("Points to an example Alignments BAM file.");
+ resource1.Tags("Example");
+ resource1.TimeStampedName("my_tsn");
+ resource1.UniqueId("my_uuid");
+ FileIndex pbi1("PacBio.Index.PacBioIndex", "file:/mnt/path/to/alignments2.pbi");
+ pbi1.TimeStampedName("my_tsn");
+ pbi1.UniqueId("my_uuid");
+ resource1.FileIndices().Add(pbi1);
+ dataset.ExternalResources().Add(resource1);
+
+ ExternalResource resource2("AlignmentFile.AlignmentBamFile", "file:./alignments3.bam");
+ resource2.Name("Fourth Alignments BAM");
+ resource2.Description("Points to another example Alignments BAM file, by relative path.");
+ resource2.Tags("Example");
+ resource2.TimeStampedName("my_tsn");
+ resource2.UniqueId("my_uuid");
+ FileIndex pbi2("PacBio.Index.PacBioIndex", "file:/mnt/path/to/alignments3.pbi");
+ pbi2.TimeStampedName("my_tsn");
+ pbi2.UniqueId("my_uuid");
+
+ resource2.FileIndices().Add(pbi2);
+ dataset.ExternalResources().Add(resource2);
+
+ // sub-datasets with filters
+ DataSetBase subDataSet1;
+ subDataSet1.Name("HighQuality Read Alignments");
+ subDataSet1.TimeStampedName("my_tsn");
+ subDataSet1.UniqueId("ab95d0a3-94b8-4918-b3af-a3f81bbe519c");
+ Filter filter1;
+ filter1.Properties().Add(Property("rq", "0.85", ">"));
+ subDataSet1.Filters().Add(filter1);
+ dataset.SubDataSets().Add(subDataSet1);
+
+ DataSetBase subDataSet2;
+ subDataSet2.Name("Alignments to chromosome 1");
+ subDataSet2.TimeStampedName("my_tsn");
+ subDataSet2.UniqueId("ac95d0a3-94b8-4918-b3af-a3f81bbe519c");
+ Filter filter2;
+ filter2.Properties().Add(Property("RNAME", "chr1", "=="));
+ subDataSet2.Filters().Add(filter2);
+ dataset.SubDataSets().Add(subDataSet2);
+
+ // write dataset
+ const string expectedXml =
+ "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+ "<pbds:AlignmentSet "
+ "CreatedAt=\"2015-01-27T09:00:01\" "
+ "MetaType=\"PacBio.DataSet.AlignmentSet\" "
+ "Name=\"DataSet_AlignmentSet\" "
+ "Tags=\"barcode moreTags mapping mytags\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"3.0.1\" "
+ "xmlns=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+ "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
+ "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+ "xmlns:pbbase=\"http://pacificbiosciences.com/PacBioBaseDataModel.xsd\" "
+ "xmlns:pbds=\"http://pacificbiosciences.com/PacBioDatasets.xsd\">\n"
+ "\t<pbbase:ExternalResources>\n"
+ "\t\t<pbbase:ExternalResource "
+ "Description=\"Points to an example Alignments BAM file.\" "
+ "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+ "Name=\"Third Alignments BAM\" "
+ "ResourceId=\"file:/mnt/path/to/alignments2.bam\" "
+ "Tags=\"Example\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"my_uuid\" Version=\"3.0.1\">\n"
+ "\t\t\t<pbbase:FileIndices>\n"
+ "\t\t\t\t<pbbase:FileIndex "
+ "MetaType=\"PacBio.Index.PacBioIndex\" "
+ "ResourceId=\"file:/mnt/path/to/alignments2.pbi\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"my_uuid\" Version=\"3.0.1\" />\n"
+ "\t\t\t</pbbase:FileIndices>\n"
+ "\t\t</pbbase:ExternalResource>\n"
+ "\t\t<pbbase:ExternalResource "
+ "Description=\"Points to another example Alignments BAM file, by relative path.\" "
+ "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+ "Name=\"Fourth Alignments BAM\" "
+ "ResourceId=\"file:./alignments3.bam\" "
+ "Tags=\"Example\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"my_uuid\" Version=\"3.0.1\">\n"
+ "\t\t\t<pbbase:FileIndices>\n"
+ "\t\t\t\t<pbbase:FileIndex "
+ "MetaType=\"PacBio.Index.PacBioIndex\" "
+ "ResourceId=\"file:/mnt/path/to/alignments3.pbi\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"my_uuid\" Version=\"3.0.1\" />\n"
+ "\t\t\t</pbbase:FileIndices>\n"
+ "\t\t</pbbase:ExternalResource>\n"
+ "\t</pbbase:ExternalResources>\n"
+ "\t<pbds:DataSets>\n"
+ "\t\t<pbds:DataSet "
+ "MetaType=\"PacBio.DataSet.DataSet\" "
+ "Name=\"HighQuality Read Alignments\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"ab95d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+ "Version=\"3.0.1\">\n"
+ "\t\t\t<pbds:Filters>\n"
+ "\t\t\t\t<pbds:Filter>\n"
+ "\t\t\t\t\t<pbbase:Properties>\n"
+ "\t\t\t\t\t\t<pbbase:Property Name=\"rq\" Operator=\">\" Value=\"0.85\" />\n"
+ "\t\t\t\t\t</pbbase:Properties>\n"
+ "\t\t\t\t</pbds:Filter>\n"
+ "\t\t\t</pbds:Filters>\n"
+ "\t\t</pbds:DataSet>\n"
+ "\t\t<pbds:DataSet "
+ "MetaType=\"PacBio.DataSet.DataSet\" "
+ "Name=\"Alignments to chromosome 1\" "
+ "TimeStampedName=\"my_tsn\" "
+ "UniqueId=\"ac95d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+ "Version=\"3.0.1\">\n"
+ "\t\t\t<pbds:Filters>\n"
+ "\t\t\t\t<pbds:Filter>\n"
+ "\t\t\t\t\t<pbbase:Properties>\n"
+ "\t\t\t\t\t\t<pbbase:Property Name=\"RNAME\" Operator=\"==\" Value=\"chr1\" />\n"
+ "\t\t\t\t\t</pbbase:Properties>\n"
+ "\t\t\t\t</pbds:Filter>\n"
+ "\t\t\t</pbds:Filters>\n"
+ "\t\t</pbds:DataSet>\n"
+ "\t</pbds:DataSets>\n"
+ "</pbds:AlignmentSet>\n";
+
+ stringstream s;
+ dataset.SaveToStream(s);
+ EXPECT_EQ(expectedXml, s.str());
+}
+
+static void TestFromXmlString(void)
+{
+ const string inputXml =
+ "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+ "<pbds:AlignmentSet "
+ "CreatedAt=\"2015-01-27T09:00:01\" "
+ "MetaType=\"PacBio.DataSet.AlignmentSet\" "
+ "Name=\"DataSet_AlignmentSet\" "
+ "Tags=\"barcode moreTags mapping mytags\" "
+ "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+ "Version=\"2.3.0\" "
+ "xmlns=\"http://pacificbiosciences.com/PacBioDataModel.xsd\" "
+ "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
+ "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDataModel.xsd\">\n"
+ "\t<pbbase:ExternalResources>\n"
+ "\t\t<pbbase:ExternalResource "
+ "Description=\"Points to an example Alignments BAM file.\" "
+ "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+ "Name=\"Third Alignments BAM\" "
+ "ResourceId=\"file:/mnt/path/to/alignments2.bam\" "
+ "Tags=\"Example\">\n"
+ "\t\t\t<pbbase:FileIndices>\n"
+ "\t\t\t\t<pbbase:FileIndex "
+ "MetaType=\"PacBio.Index.PacBioIndex\" "
+ "ResourceId=\"file:/mnt/path/to/alignments2.pbi\" />\n"
+ "\t\t\t</pbbase:FileIndices>\n"
+ "\t\t</pbbase:ExternalResource>\n"
+ "\t\t<pbbase:ExternalResource "
+ "Description=\"Points to another example Alignments BAM file, by relative path.\" "
+ "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+ "Name=\"Fourth Alignments BAM\" "
+ "ResourceId=\"file:./alignments3.bam\" "
+ "Tags=\"Example\">\n"
+ "\t\t\t<pbbase:FileIndices>\n"
+ "\t\t\t\t<pbbase:FileIndex "
+ "MetaType=\"PacBio.Index.PacBioIndex\" "
+ "ResourceId=\"file:/mnt/path/to/alignments3.pbi\" />\n"
+ "\t\t\t</pbbase:FileIndices>\n"
+ "\t\t</pbbase:ExternalResource>\n"
+ "\t</pbbase:ExternalResources>\n"
+ "\t<pbds:DataSets>\n"
+ "\t\t<pbds:DataSet "
+ "Name=\"HighQuality Read Alignments\" "
+ "UniqueId=\"ab95d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+ "Version=\"2.3.0\">\n"
+ "\t\t\t<pbds:Filters>\n"
+ "\t\t\t\t<pbds:Filter>\n"
+ "\t\t\t\t\t<pbbase:Properties>\n"
+ "\t\t\t\t\t\t<pbbase:Property Name=\"rq\" Operator=\">\" Value=\"0.85\" />\n"
+ "\t\t\t\t\t</pbbase:Properties>\n"
+ "\t\t\t\t</pbds:Filter>\n"
+ "\t\t\t</pbds:Filters>\n"
+ "\t\t</pbds:DataSet>\n"
+ "\t\t<pbds:DataSet "
+ "Name=\"Alignments to chromosome 1\" "
+ "UniqueId=\"ac95d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+ "Version=\"2.3.0\">\n"
+ "\t\t\t<pbds:Filters>\n"
+ "\t\t\t\t<pbds:Filter>\n"
+ "\t\t\t\t\t<pbbase:Properties>\n"
+ "\t\t\t\t\t\t<pbbase:Property Name=\"RNAME\" Operator=\"==\" Value=\"chr1\" />\n"
+ "\t\t\t\t\t</pbbase:Properties>\n"
+ "\t\t\t\t</pbds:Filter>\n"
+ "\t\t\t</pbds:Filters>\n"
+ "\t\t</pbds:DataSet>\n"
+ "\t</pbds:DataSets>\n"
+ "</pbds:AlignmentSet>\n";
+
+ const DataSet dataset = DataSet::FromXml(inputXml);
+
+ EXPECT_EQ(DataSet::ALIGNMENT, dataset.Type());
+ EXPECT_EQ("2015-01-27T09:00:01", dataset.CreatedAt());
+ EXPECT_EQ("PacBio.DataSet.AlignmentSet", dataset.MetaType());
+ EXPECT_EQ("DataSet_AlignmentSet", dataset.Name());
+ EXPECT_EQ("barcode moreTags mapping mytags", dataset.Tags());
+ EXPECT_EQ("b095d0a3-94b8-4918-b3af-a3f81bbe519c", dataset.UniqueId());
+ EXPECT_EQ("2.3.0", dataset.Version());
+ EXPECT_EQ("http://pacificbiosciences.com/PacBioDataModel.xsd", dataset.Attribute("xmlns"));
+ EXPECT_EQ("http://www.w3.org/2001/XMLSchema-instance", dataset.Attribute("xmlns:xsi"));
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ EXPECT_EQ(2, resources.Size());
+
+ const ExternalResource& resource1 = resources[0];
+ EXPECT_EQ("Third Alignments BAM", resource1.Name());
+ EXPECT_EQ("Points to an example Alignments BAM file.", resource1.Description());
+ EXPECT_EQ("AlignmentFile.AlignmentBamFile", resource1.MetaType());
+ EXPECT_EQ("file:/mnt/path/to/alignments2.bam", resource1.ResourceId());
+ EXPECT_EQ("Example", resource1.Tags());
+ const FileIndices& fileIndices1 = resource1.FileIndices();
+ EXPECT_EQ(1, fileIndices1.Size());
+ const FileIndex& pbi1 = fileIndices1[0];
+ EXPECT_EQ("PacBio.Index.PacBioIndex", pbi1.MetaType());
+ EXPECT_EQ("file:/mnt/path/to/alignments2.pbi", pbi1.ResourceId());
+
+ const ExternalResource& resource2 = resources[1];
+ EXPECT_EQ("Fourth Alignments BAM", resource2.Name());
+ EXPECT_EQ("Points to another example Alignments BAM file, by relative path.", resource2.Description());
+ EXPECT_EQ("AlignmentFile.AlignmentBamFile", resource2.MetaType());
+ EXPECT_EQ("file:./alignments3.bam", resource2.ResourceId());
+ EXPECT_EQ("Example", resource2.Tags());
+ const FileIndices& fileIndices2 = resource2.FileIndices();
+ EXPECT_EQ(1, fileIndices2.Size());
+ const FileIndex& pbi2 = fileIndices2[0];
+ EXPECT_EQ("PacBio.Index.PacBioIndex", pbi2.MetaType());
+ EXPECT_EQ("file:/mnt/path/to/alignments3.pbi", pbi2.ResourceId());
+
+ const SubDataSets& subDatasets = dataset.SubDataSets();
+ EXPECT_EQ(2, subDatasets.Size());
+
+ const DataSetBase& sub1 = subDatasets[0];
+ EXPECT_EQ("HighQuality Read Alignments", sub1.Name());
+ EXPECT_EQ("ab95d0a3-94b8-4918-b3af-a3f81bbe519c", sub1.UniqueId());
+ EXPECT_EQ("2.3.0", sub1.Version());
+ const Filters& sub1Filters = sub1.Filters();
+ EXPECT_EQ(1, sub1Filters.Size());
+ const Filter& sub1Filter = sub1Filters[0];
+ EXPECT_EQ(1, sub1Filter.Properties().Size());
+ const Property& property1 = sub1Filter.Properties()[0];
+ EXPECT_EQ("rq", property1.Name());
+ EXPECT_EQ(">", property1.Operator());
+ EXPECT_EQ("0.85", property1.Value());
+
+ const DataSetBase& sub2 = subDatasets[1];
+ EXPECT_EQ("Alignments to chromosome 1", sub2.Name());
+ EXPECT_EQ("ac95d0a3-94b8-4918-b3af-a3f81bbe519c", sub2.UniqueId());
+ EXPECT_EQ("2.3.0", sub2.Version());
+ const Filters& sub2Filters = sub2.Filters();
+ EXPECT_EQ(1, sub2Filters.Size());
+ const Filter& sub2Filter = sub2Filters[0];
+ EXPECT_EQ(1, sub2Filter.Properties().Size());
+ const Property& property2 = sub2Filter.Properties()[0];
+ EXPECT_EQ("RNAME", property2.Name());
+ EXPECT_EQ("==", property2.Operator());
+ EXPECT_EQ("chr1", property2.Value());
+}
+
+static void TestAli1Xml(void)
+{
+ const DataSet dataset(ali1XmlFn);
+ EXPECT_EQ(DataSet::ALIGNMENT, dataset.Type());
+ EXPECT_EQ(string("2015-01-27T09:00:01"), dataset.CreatedAt());
+ EXPECT_EQ(string("PacBio.DataSet.AlignmentSet"), dataset.MetaType());
+ EXPECT_EQ(string("DataSet_AlignmentSet"), dataset.Name());
+ EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
+ EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
+
+ EXPECT_EQ(0, dataset.Filters().Size());
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(2, resources.Size());
+ for (size_t i = 0; i < resources.Size(); ++i) {
+ const ExternalResource& resource = resources[i];
+ if (i == 0) {
+ EXPECT_EQ(string("First Alignments BAM"), resource.Name());
+ EXPECT_EQ(string("Points to an example Alignments BAM file."), resource.Description());
+ EXPECT_EQ(string("AlignmentFile.AlignmentBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/alignments0.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/alignments0.pbi"), index.ResourceId());
+ }
+ else {
+ EXPECT_EQ(string("Second Alignments BAM"), resource.Name());
+ EXPECT_EQ(string("Points to another example Alignments BAM file, by relative path."), resource.Description());
+ EXPECT_EQ(string("AlignmentFile.AlignmentBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:./alignments1.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/alignments1.pbi"), index.ResourceId());
+ }
+ }
+
+ const SubDataSets& subdatasets = dataset.SubDataSets();
+ ASSERT_EQ(2, subdatasets.Size());
+ for (size_t i = 0; i < subdatasets.Size(); ++i) {
+ const DataSetBase& subdataset = subdatasets[i];
+ if (i == 0) {
+ EXPECT_EQ(string(""), subdataset.CreatedAt());
+ EXPECT_EQ(string(""), subdataset.MetaType());
+ EXPECT_EQ(string("HighQuality Read Alignments"), subdataset.Name());
+ EXPECT_EQ(string(""), subdataset.Tags());
+ EXPECT_EQ(string("ab95d0a3-94b8-4918-b3af-a3f81bbe519c"), subdataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), subdataset.Version());
+
+ const Filters& filters = subdataset.Filters();
+ ASSERT_EQ(1, filters.Size());
+ const Filter& filter = filters[0];
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("rq"), property.Name());
+ EXPECT_EQ(string("0.85"), property.Value());
+ EXPECT_EQ(string(">"), property.Operator());
+ }
+ else {
+ EXPECT_EQ(string(""), subdataset.CreatedAt());
+ EXPECT_EQ(string(""), subdataset.MetaType());
+ EXPECT_EQ(string("Alignments to chromosome 1"), subdataset.Name());
+ EXPECT_EQ(string(""), subdataset.Tags());
+ EXPECT_EQ(string("ac95d0a3-94b8-4918-b3af-a3f81bbe519c"), subdataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), subdataset.Version());
+
+ const Filters& filters = subdataset.Filters();
+ ASSERT_EQ(1, filters.Size());
+ const Filter& filter = filters[0];
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("RNAME"), property.Name());
+ EXPECT_EQ(string("chr1"), property.Value());
+ EXPECT_EQ(string("=="), property.Operator());
+ }
+ }
+}
+
+static void TestAli2Xml(void)
+{
+ const DataSet dataset(ali2XmlFn);
+ EXPECT_EQ(DataSet::ALIGNMENT, dataset.Type());
+ EXPECT_EQ(string("2015-01-27T09:00:01"), dataset.CreatedAt());
+ EXPECT_EQ(string("PacBio.DataSet.AlignmentSet"), dataset.MetaType());
+ EXPECT_EQ(string("DataSet_AlignmentSet"), dataset.Name());
+ EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
+ EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
+
+ EXPECT_EQ(0, dataset.Filters().Size());
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(2, resources.Size());
+ for (size_t i = 0; i < resources.Size(); ++i) {
+ const ExternalResource& resource = resources[i];
+ if (i == 0) {
+ EXPECT_EQ(string("First Alignments BAM"), resource.Name());
+ EXPECT_EQ(string("Points to an example Alignments BAM file."), resource.Description());
+ EXPECT_EQ(string("AlignmentFile.AlignmentBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/alignments2.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/alignments2.pbi"), index.ResourceId());
+ }
+ else {
+ EXPECT_EQ(string("Second Alignments BAM"), resource.Name());
+ EXPECT_EQ(string("Points to another example Alignments BAM file, by relative path."), resource.Description());
+ EXPECT_EQ(string("AlignmentFile.AlignmentBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:./alignments3.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/alignments3.pbi"), index.ResourceId());
+ }
+ }
+
+ const SubDataSets& subdatasets = dataset.SubDataSets();
+ ASSERT_EQ(2, subdatasets.Size());
+ for (size_t i = 0; i < subdatasets.Size(); ++i) {
+ const DataSetBase& subdataset = subdatasets[i];
+ if (i == 0) {
+ EXPECT_EQ(string(""), subdataset.CreatedAt());
+ EXPECT_EQ(string(""), subdataset.MetaType());
+ EXPECT_EQ(string("HighQuality Read Alignments"), subdataset.Name());
+ EXPECT_EQ(string(""), subdataset.Tags());
+ EXPECT_EQ(string("ab95d0a3-94b8-4918-b3af-a3f81bbe519c"), subdataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), subdataset.Version());
+
+ const Filters& filters = subdataset.Filters();
+ ASSERT_EQ(1, filters.Size());
+ const Filter& filter = filters[0];
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("rq"), property.Name());
+ EXPECT_EQ(string("0.85"), property.Value());
+ EXPECT_EQ(string(">"), property.Operator());
+ }
+ else {
+ EXPECT_EQ(string(""), subdataset.CreatedAt());
+ EXPECT_EQ(string(""), subdataset.MetaType());
+ EXPECT_EQ(string("Alignments to chromosome 1"), subdataset.Name());
+ EXPECT_EQ(string(""), subdataset.Tags());
+ EXPECT_EQ(string("ac95d0a3-94b8-4918-b3af-a3f81bbe519c"), subdataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), subdataset.Version());
+
+ const Filters& filters = subdataset.Filters();
+ ASSERT_EQ(1, filters.Size());
+ const Filter& filter = filters[0];
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("RNAME"), property.Name());
+ EXPECT_EQ(string("chr1"), property.Value());
+ EXPECT_EQ(string("=="), property.Operator());
+ }
+ }
+}
+
+static void TestAli3Xml(void)
+{
+ const DataSet dataset(ali3XmlFn);
+ EXPECT_EQ(DataSet::ALIGNMENT, dataset.Type());
+ EXPECT_EQ(string("2015-01-27T09:00:01"), dataset.CreatedAt());
+ EXPECT_EQ(string("PacBio.DataSet.AlignmentSet"), dataset.MetaType());
+ EXPECT_EQ(string("DataSet_AlignmentSet"), dataset.Name());
+ EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
+ EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
+
+ EXPECT_EQ(0, dataset.Filters().Size());
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(2, resources.Size());
+ for (size_t i = 0; i < resources.Size(); ++i) {
+ const ExternalResource& resource = resources[i];
+ if (i == 0) {
+ EXPECT_EQ(string("First Alignments BAM"), resource.Name());
+ EXPECT_EQ(string("Points to an example Alignments BAM file."), resource.Description());
+ EXPECT_EQ(string("AlignmentFile.AlignmentBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/alignments2.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/alignments2.pbi"), index.ResourceId());
+ }
+ else {
+ EXPECT_EQ(string("Second Alignments BAM"), resource.Name());
+ EXPECT_EQ(string("Points to another example Alignments BAM file, by relative path."), resource.Description());
+ EXPECT_EQ(string("AlignmentFile.AlignmentBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:./alignments3.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/alignments3.pbi"), index.ResourceId());
+ }
+ }
+
+ const SubDataSets& subdatasets = dataset.SubDataSets();
+ ASSERT_EQ(2, subdatasets.Size());
+ for (size_t i = 0; i < subdatasets.Size(); ++i) {
+ const DataSetBase& subdataset = subdatasets[i];
+ if (i == 0) {
+ EXPECT_EQ(string(""), subdataset.CreatedAt());
+ EXPECT_EQ(string(""), subdataset.MetaType());
+ EXPECT_EQ(string("HighQuality Read Alignments"), subdataset.Name());
+ EXPECT_EQ(string(""), subdataset.Tags());
+ EXPECT_EQ(string("ab95d0a3-94b8-4918-b3af-a3f81bbe519c"), subdataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), subdataset.Version());
+
+ const Filters& filters = subdataset.Filters();
+ ASSERT_EQ(1, filters.Size());
+ const Filter& filter = filters[0];
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("rq"), property.Name());
+ EXPECT_EQ(string("0.75"), property.Value());
+ EXPECT_EQ(string(">"), property.Operator());
+ }
+ else {
+ EXPECT_EQ(string(""), subdataset.CreatedAt());
+ EXPECT_EQ(string(""), subdataset.MetaType());
+ EXPECT_EQ(string("Alignments to chromosome 1"), subdataset.Name());
+ EXPECT_EQ(string(""), subdataset.Tags());
+ EXPECT_EQ(string("ac95d0a3-94b8-4918-b3af-a3f81bbe519c"), subdataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), subdataset.Version());
+
+ const Filters& filters = subdataset.Filters();
+ ASSERT_EQ(1, filters.Size());
+ const Filter& filter = filters[0];
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("RNAME"), property.Name());
+ EXPECT_EQ(string("chr1"), property.Value());
+ EXPECT_EQ(string("=="), property.Operator());
+ }
+ }
+}
+
+static void TestAli4Xml(void)
+{
+ const DataSet dataset(ali4XmlFn);
+ EXPECT_EQ(DataSet::ALIGNMENT, dataset.Type());
+ EXPECT_EQ(string("2015-01-27T09:00:01"), dataset.CreatedAt());
+ EXPECT_EQ(string("PacBio.DataSet.AlignmentSet"), dataset.MetaType());
+ EXPECT_EQ(string("DataSet_AlignmentSet"), dataset.Name());
+ EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
+ EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
+
+ EXPECT_EQ(0, dataset.Filters().Size());
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(2, resources.Size());
+ for (size_t i = 0; i < resources.Size(); ++i) {
+ const ExternalResource& resource = resources[i];
+ if (i == 0) {
+ EXPECT_EQ(string("First Alignments BAM"), resource.Name());
+ EXPECT_EQ(string("Points to an example Alignments BAM file."), resource.Description());
+ EXPECT_EQ(string("AlignmentFile.AlignmentBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/alignments0.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/alignments0.pbi"), index.ResourceId());
+ }
+ else {
+ EXPECT_EQ(string("Second Alignments BAM"), resource.Name());
+ EXPECT_EQ(string("Points to another example Alignments BAM file, by relative path."), resource.Description());
+ EXPECT_EQ(string("AlignmentFile.AlignmentBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:./alignments1.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/alignments1.pbi"), index.ResourceId());
+ }
+ }
+
+ const SubDataSets& subdatasets = dataset.SubDataSets();
+ ASSERT_EQ(2, subdatasets.Size());
+ for (size_t i = 0; i < subdatasets.Size(); ++i) {
+ const DataSetBase& subdataset = subdatasets[i];
+ if (i == 0) {
+ EXPECT_EQ(string(""), subdataset.CreatedAt());
+ EXPECT_EQ(string(""), subdataset.MetaType());
+ EXPECT_EQ(string("HighQuality Read Alignments"), subdataset.Name());
+ EXPECT_EQ(string(""), subdataset.Tags());
+ EXPECT_EQ(string("ab95d0a3-94b8-4918-b3af-a3f81bbe519c"), subdataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), subdataset.Version());
+
+ const Filters& filters = subdataset.Filters();
+ ASSERT_EQ(1, filters.Size());
+ const Filter& filter = filters[0];
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("rq"), property.Name());
+ EXPECT_EQ(string("0.85"), property.Value());
+ EXPECT_EQ(string(">"), property.Operator());
+ }
+ else {
+ EXPECT_EQ(string(""), subdataset.CreatedAt());
+ EXPECT_EQ(string(""), subdataset.MetaType());
+ EXPECT_EQ(string("Alignments to chromosome 1"), subdataset.Name());
+ EXPECT_EQ(string(""), subdataset.Tags());
+ EXPECT_EQ(string("ac95d0a3-94b8-4918-b3af-a3f81bbe519c"), subdataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), subdataset.Version());
+
+ const Filters& filters = subdataset.Filters();
+ ASSERT_EQ(1, filters.Size());
+ const Filter& filter = filters[0];
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("RNAME"), property.Name());
+ EXPECT_EQ(string("chr1"), property.Value());
+ EXPECT_EQ(string("=="), property.Operator());
+ }
+ }
+}
+
+static void TestMappingStaggeredXml(void)
+{
+ const DataSet dataset(mappingStaggeredXmlFn);
+ EXPECT_EQ(DataSet::GENERIC, dataset.Type());
+ EXPECT_EQ(string("2015-05-13T10:58:26"), dataset.CreatedAt());
+ EXPECT_EQ(string("PacBio.DataSet.DataSet"), dataset.MetaType());
+ EXPECT_EQ(string(""), dataset.Name());
+ EXPECT_EQ(string(""), dataset.Tags());
+ EXPECT_EQ(string("30f72098-bc5b-e06b-566c-8b28dda909a8"), dataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
+
+ EXPECT_EQ(0, dataset.Filters().Size());
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(2, resources.Size());
+ for (size_t i = 0; i < resources.Size(); ++i) {
+ const ExternalResource& resource = resources[i];
+ if (i == 0) {
+ EXPECT_EQ(string(""), resource.Name());
+ EXPECT_EQ(string(""), resource.Description());
+ EXPECT_EQ(string(""), resource.MetaType());
+ EXPECT_EQ(string("file:tests/data/bam_mapping_1.bam"), resource.ResourceId());
+ EXPECT_EQ(string(""), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:tests/data/bam_mapping_1.bam.bai"), index.ResourceId());
+ }
+ else {
+ EXPECT_EQ(string(""), resource.Name());
+ EXPECT_EQ(string(""), resource.Description());
+ EXPECT_EQ(string(""), resource.MetaType());
+ EXPECT_EQ(string("file:tests/data/bam_mapping_2.bam"), resource.ResourceId());
+ EXPECT_EQ(string(""), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:tests/data/bam_mapping_2.bam.bai"), index.ResourceId());
+ }
+ }
+
+ const SubDataSets& subdatasets = dataset.SubDataSets();
+ ASSERT_EQ(2, subdatasets.Size());
+ for (size_t i = 0; i < subdatasets.Size(); ++i) {
+ const DataSetBase& subdataset = subdatasets[i];
+ if (i == 0) {
+ EXPECT_EQ(string("2015-05-13T10:58:26"), subdataset.CreatedAt());
+ EXPECT_EQ(string(""), subdataset.MetaType());
+ EXPECT_EQ(string(""), subdataset.Name());
+ EXPECT_EQ(string(""), subdataset.Tags());
+ EXPECT_EQ(string("c5402d06-4643-057c-e300-fe229b4e8909"), subdataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), subdataset.Version());
+
+ const ExternalResources& resources = subdataset.ExternalResources();
+ ASSERT_EQ(1, resources.Size());
+ const ExternalResource& resource = resources[0];
+ EXPECT_EQ(string("file:tests/data/bam_mapping_2.bam"), resource.ResourceId());
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:tests/data/bam_mapping_2.bam.bai"), index.ResourceId());
+ }
+ else {
+ EXPECT_EQ(string("2015-05-13T10:58:26"), subdataset.CreatedAt());
+ EXPECT_EQ(string(""), subdataset.MetaType());
+ EXPECT_EQ(string(""), subdataset.Name());
+ EXPECT_EQ(string(""), subdataset.Tags());
+ EXPECT_EQ(string("f8b54a55-5fb7-706f-ab35-39afc9c86924"), subdataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), subdataset.Version());
+
+ const ExternalResources& resources = subdataset.ExternalResources();
+ ASSERT_EQ(1, resources.Size());
+ const ExternalResource& resource = resources[0];
+ EXPECT_EQ(string("file:tests/data/bam_mapping_1.bam"), resource.ResourceId());
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:tests/data/bam_mapping_1.bam.bai"), index.ResourceId());
+ }
+ }
+}
+
+static void TestBarcodeXml(void)
+{
+ const DataSet dataset(barcodeXmlFn);
+ EXPECT_EQ(DataSet::BARCODE, dataset.Type());
+ EXPECT_EQ(string("2015-01-27T09:00:01"), dataset.CreatedAt());
+ EXPECT_EQ(string("PacBio.DataSet.BarcodeSet"), dataset.MetaType());
+ EXPECT_EQ(string("DataSet_BarcodeSet"), dataset.Name());
+ EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
+ EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
+
+ EXPECT_EQ(0, dataset.Filters().Size());
+ EXPECT_EQ(0, dataset.SubDataSets().Size());
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(1, resources.Size());
+ const ExternalResource& resource = resources[0];
+ EXPECT_EQ(string("First Barcodes FASTA"), resource.Name());
+ EXPECT_EQ(string("Points to an example Barcodes FASTA file."), resource.Description());
+ EXPECT_EQ(string("BarcodeFile.BarcodeFastaFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/barcode.fasta"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const DataSetMetadata& metadata = dataset.Metadata();
+ EXPECT_EQ(string("30"), metadata.NumRecords());
+ EXPECT_EQ(string("400"), metadata.TotalLength());
+
+ // access metadata extensions directly for now
+ EXPECT_EQ(string("paired"), metadata.ChildText("BarcodeConstruction"));
+}
+
+static void TestCcsReadXml(void)
+{
+ const DataSet dataset(ccsReadXmlFn);
+ EXPECT_EQ(DataSet::CONSENSUS_READ, dataset.Type());
+ EXPECT_EQ(string("2015-01-27T09:00:01"), dataset.CreatedAt());
+ EXPECT_EQ(string("PacBio.DataSet.ConsensusReadSet"), dataset.MetaType());
+ EXPECT_EQ(string("DataSet_ConsensusReadSet"), dataset.Name());
+ EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
+ EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
+
+ EXPECT_EQ(0, dataset.Filters().Size());
+ EXPECT_EQ(0, dataset.SubDataSets().Size());
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(2, resources.Size());
+ for (size_t i = 0; i < resources.Size(); ++i) {
+ const ExternalResource& resource = resources[i];
+ if (i == 0) {
+ EXPECT_EQ(string("First ConsensusRead BAM"), resource.Name());
+ EXPECT_EQ(string("Points to an example ConsensusRead BAM file."), resource.Description());
+ EXPECT_EQ(string("PacBio.ConsensusReadFile.ConsensusReadBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/ccsreads0.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("PacBio.Index.PacBioIndex"), index.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/ccsreads0.pbi"), index.ResourceId());
+ }
+ else {
+ EXPECT_EQ(string("Second ConsensusRead BAM"), resource.Name());
+ EXPECT_EQ(string("Points to another example ConsensusRead BAM file."), resource.Description());
+ EXPECT_EQ(string("PacBio.ConsensusReadFile.ConsensusReadBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/ccsreads1.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("PacBio.Index.PacBioIndex"), index.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/ccsreads0.pbi"), index.ResourceId());
+ }
+ }
+}
+
+static void TestLambdaContigsXml(void)
+{
+ const DataSet dataset(lambdaContigsXmlFn);
+ EXPECT_EQ(DataSet::REFERENCE, dataset.Type());
+ EXPECT_EQ(string("2015-05-28T10:56:36"), dataset.CreatedAt());
+ EXPECT_EQ(string("PacBio.DataSet.ReferenceSet"), dataset.MetaType());
+ EXPECT_EQ(string(""), dataset.Name());
+ EXPECT_EQ(string(""), dataset.Tags());
+ EXPECT_EQ(string("596e87db-34f9-d2fd-c905-b017543170e1"), dataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
+
+ EXPECT_EQ(0, dataset.Filters().Size());
+ EXPECT_EQ(0, dataset.SubDataSets().Size());
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(1, resources.Size());
+ const ExternalResource& resource = resources[0];
+ EXPECT_EQ(string("file:tests/data/lambda_contigs.fasta"), resource.ResourceId());
+}
+
+static void TestPbalchemyXml(void)
+{
+ const DataSet dataset(pbalchemyXmlFn);
+ EXPECT_EQ(DataSet::GENERIC, dataset.Type());
+ EXPECT_EQ(string("2015-05-22T16:56:16"), dataset.CreatedAt());
+ EXPECT_EQ(string("PacBio.DataSet.DataSet"), dataset.MetaType());
+ EXPECT_EQ(string(""), dataset.Name());
+ EXPECT_EQ(string(""), dataset.Tags());
+ EXPECT_EQ(string("58e3f7c5-24c1-b58b-fbd5-37de268cc2f0"), dataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
+
+ EXPECT_EQ(0, dataset.SubDataSets().Size());
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(1, resources.Size());
+ const ExternalResource& resource = resources[0];
+ EXPECT_EQ(string("file:tests/data/pbalchemy10kbp.pbalign.sorted.pbver1.bam"), resource.ResourceId());
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:tests/data/pbalchemy10kbp.pbalign.sorted.pbver1.bam.bai"), index.ResourceId());
+
+ // TYPOs: Should be Filter Properties/Property not Parameter(s)
+
+}
+
+static void TestReferenceXml(void)
+{
+ const DataSet dataset(referenceXmlFn);
+ EXPECT_EQ(DataSet::REFERENCE, dataset.Type());
+ EXPECT_EQ(string("2015-01-27T09:00:01"), dataset.CreatedAt());
+ EXPECT_EQ(string("PacBio.DataSet.ReferenceSet"), dataset.MetaType());
+ EXPECT_EQ(string("DataSet_ReferenceSet"), dataset.Name());
+ EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
+ EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
+
+ EXPECT_EQ(0, dataset.Filters().Size());
+ EXPECT_EQ(0, dataset.SubDataSets().Size());
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(1, resources.Size());
+ const ExternalResource& resource = resources[0];
+ EXPECT_EQ(string("First References FASTA"), resource.Name());
+ EXPECT_EQ(string("Points to an example references FASTA file."), resource.Description());
+ EXPECT_EQ(string("PacBio.ReferenceFile.ReferenceFastaFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/reference.fasta"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(2, fileIndices.Size());
+ for (size_t i = 0; i < fileIndices.Size(); ++i) {
+ const FileIndex& index = fileIndices[i];
+ if (i == 0) {
+ EXPECT_EQ(string("PacBio.Index.SaWriterIndex"), index.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/reference.fasta.sa"), index.ResourceId());
+ }
+ else {
+ EXPECT_EQ(string("PacBio.Index.SamIndex"), index.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/reference.fasta.fai"), index.ResourceId());
+ }
+ }
+
+ const DataSetMetadata& metadata = dataset.Metadata();
+ EXPECT_EQ(string("500"), metadata.NumRecords());
+ EXPECT_EQ(string("5000000"), metadata.TotalLength());
+
+ // access metadata extensions directly for now
+ EXPECT_EQ(string("Tribble"), metadata.ChildText("Organism"));
+ EXPECT_EQ(string("Diploid"), metadata.ChildText("Ploidy"));
+
+ const internal::DataSetListElement<internal::DataSetElement>& contigs =
+ metadata.Child<internal::DataSetListElement<internal::DataSetElement> >("Contigs");
+ ASSERT_EQ(1, contigs.NumChildren());
+ const internal::DataSetElement& contig = contigs[0];
+ EXPECT_EQ(string("gi|229359445|emb|AM181176.4|"), contig.Attribute("Name"));
+ EXPECT_EQ(string("Pseudomonas fluorescens SBW25 complete genome|quiver"), contig.Attribute("Description"));
+ EXPECT_EQ(string("6722109"), contig.Attribute("Length"));
+ EXPECT_EQ(string("f627c795efad7ce0050ed42b942d408e"), contig.Attribute("Digest"));
+}
+
+static void TestSubread1Xml(void)
+{
+ const DataSet dataset(subread1XmlFn);
+ EXPECT_EQ(DataSet::SUBREAD, dataset.Type());
+ EXPECT_EQ(string("2015-01-27T09:00:01"), dataset.CreatedAt());
+ EXPECT_EQ(string("PacBio.DataSet.SubreadSet"), dataset.MetaType());
+ EXPECT_EQ(string("DataSet_SubreadSet"), dataset.Name());
+ EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
+ EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(2, resources.Size());
+ for (size_t i = 0; i < resources.Size(); ++i) {
+ const ExternalResource& resource = resources[i];
+ if (i == 0) {
+ EXPECT_EQ(string("First Subreads BAM"), resource.Name());
+ EXPECT_EQ(string("Points to an example Subreads BAM file."), resource.Description());
+ EXPECT_EQ(string("SubreadFile.SubreadBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/subreads0.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/subreads0.pbi"), index.ResourceId());
+ }
+ else {
+ EXPECT_EQ(string("Second Subreads BAM"), resource.Name());
+ EXPECT_EQ(string("Points to another example Subreads BAM file."), resource.Description());
+ EXPECT_EQ(string("SubreadFile.SubreadBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/subreads1.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/subreads0.pbi"), index.ResourceId());
+ }
+ }
+
+ const Filters& filters = dataset.Filters();
+ ASSERT_EQ(2, filters.Size());
+ for (size_t i = 0; i < filters.Size(); ++i) {
+ const Filter& filter = filters[i];
+ if (i == 0) {
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("rq"), property.Name());
+ EXPECT_EQ(string("0.75"), property.Value());
+ EXPECT_EQ(string(">"), property.Operator());
+ } else {
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("QNAME"), property.Name());
+ EXPECT_EQ(string("100/0/0_100"), property.Value());
+ EXPECT_EQ(string("=="), property.Operator());
+ }
+ }
+
+ const DataSetMetadata& metadata = dataset.Metadata();
+ EXPECT_EQ(string("500"), metadata.NumRecords());
+ EXPECT_EQ(string("500000"), metadata.TotalLength());
+}
+
+static void TestSubread2Xml(void)
+{
+ const DataSet dataset(subread2XmlFn);
+ EXPECT_EQ(DataSet::SUBREAD, dataset.Type());
+ EXPECT_EQ(string("2015-01-27T09:00:01"), dataset.CreatedAt());
+ EXPECT_EQ(string("PacBio.DataSet.SubreadSet"), dataset.MetaType());
+ EXPECT_EQ(string("DataSet_SubreadSet"), dataset.Name());
+ EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
+ EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(2, resources.Size());
+ for (size_t i = 0; i < resources.Size(); ++i) {
+ const ExternalResource& resource = resources[i];
+ if (i == 0) {
+ EXPECT_EQ(string("First Subreads BAM"), resource.Name());
+ EXPECT_EQ(string("Points to an example Subreads BAM file."), resource.Description());
+ EXPECT_EQ(string("SubreadFile.SubreadBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/subreads2.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/subreads2.pbi"), index.ResourceId());
+ }
+ else {
+ EXPECT_EQ(string("Second Subreads BAM"), resource.Name());
+ EXPECT_EQ(string("Points to another example Subreads BAM file."), resource.Description());
+ EXPECT_EQ(string("SubreadFile.SubreadBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/subreads3.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/subreads3.pbi"), index.ResourceId());
+ }
+ }
+
+ const Filters& filters = dataset.Filters();
+ ASSERT_EQ(2, filters.Size());
+ for (size_t i = 0; i < filters.Size(); ++i) {
+ const Filter& filter = filters[i];
+ if (i == 0) {
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("rq"), property.Name());
+ EXPECT_EQ(string("0.75"), property.Value());
+ EXPECT_EQ(string(">"), property.Operator());
+ } else {
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("QNAME"), property.Name());
+ EXPECT_EQ(string("100/0/0_100"), property.Value());
+ EXPECT_EQ(string("=="), property.Operator());
+ }
+ }
+
+ const DataSetMetadata& metadata = dataset.Metadata();
+ EXPECT_EQ(string("500"), metadata.NumRecords());
+ EXPECT_EQ(string("500000"), metadata.TotalLength());
+}
+
+static void TestSubread3Xml(void)
+{
+ const DataSet dataset(subread3XmlFn);
+ EXPECT_EQ(DataSet::SUBREAD, dataset.Type());
+ EXPECT_EQ(string("2015-01-27T09:00:01"), dataset.CreatedAt());
+ EXPECT_EQ(string("PacBio.DataSet.SubreadSet"), dataset.MetaType());
+ EXPECT_EQ(string("DataSet_SubreadSet"), dataset.Name());
+ EXPECT_EQ(string("barcode moreTags mapping mytags"), dataset.Tags());
+ EXPECT_EQ(string("b095d0a3-94b8-4918-b3af-a3f81bbe519c"), dataset.UniqueId());
+ EXPECT_EQ(string("2.3.0"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema-instance"), dataset.Attribute("xmlns:xsi"));
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xsi:schemaLocation"));
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(2, resources.Size());
+ for (size_t i = 0; i < resources.Size(); ++i) {
+ const ExternalResource& resource = resources[i];
+ if (i == 0) {
+ EXPECT_EQ(string("First Subreads BAM"), resource.Name());
+ EXPECT_EQ(string("Points to an example Subreads BAM file."), resource.Description());
+ EXPECT_EQ(string("SubreadFile.SubreadBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/subreads2.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/subreads2.pbi"), index.ResourceId());
+ }
+ else {
+ EXPECT_EQ(string("Second Subreads BAM"), resource.Name());
+ EXPECT_EQ(string("Points to another example Subreads BAM file."), resource.Description());
+ EXPECT_EQ(string("SubreadFile.SubreadBamFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/path/to/subreads3.bam"), resource.ResourceId());
+ EXPECT_EQ(string("Example"), resource.Tags());
+
+ const FileIndices& fileIndices = resource.FileIndices();
+ ASSERT_EQ(1, fileIndices.Size());
+ const FileIndex& index = fileIndices[0];
+ EXPECT_EQ(string("file:///mnt/path/to/subreads3.pbi"), index.ResourceId());
+ }
+ }
+
+ const Filters& filters = dataset.Filters();
+ ASSERT_EQ(2, filters.Size());
+ for (size_t i = 0; i < filters.Size(); ++i) {
+ const Filter& filter = filters[i];
+ if (i == 0) {
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("rq"), property.Name());
+ EXPECT_EQ(string("0.85"), property.Value());
+ EXPECT_EQ(string(">"), property.Operator());
+ } else {
+ const Properties& properties = filter.Properties();
+ ASSERT_EQ(1, properties.Size());
+ const Property& property = properties[0];
+ EXPECT_EQ(string("QNAME"), property.Name());
+ EXPECT_EQ(string("100/0/0_100"), property.Value());
+ EXPECT_EQ(string("=="), property.Operator());
+ }
+ }
+
+ const DataSetMetadata& metadata = dataset.Metadata();
+ EXPECT_EQ(string("500"), metadata.NumRecords());
+ EXPECT_EQ(string("500000"), metadata.TotalLength());
+}
+
+static void TestTransformedXml(void)
+{
+ const DataSet dataset(transformedXmlFn);
+ EXPECT_EQ(DataSet::HDF_SUBREAD, dataset.Type());
+ EXPECT_EQ(string("PacBio.DataSet.SubreadSet"), dataset.MetaType());
+ EXPECT_EQ(string("Subreads from run r001173_42129_130607"), dataset.Name());
+ EXPECT_EQ(string("pacbio.secondary.instrument=RS"), dataset.Tags());
+ EXPECT_EQ(string("abbc9183-b01e-4671-8c12-19efee534647"), dataset.UniqueId());
+ EXPECT_EQ(string("0.5"), dataset.Version());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dataset.Attribute("xmlns"));
+ EXPECT_EQ(string("http://www.w3.org/2001/XMLSchema"), dataset.Attribute("xmlns:xs"));
+ EXPECT_EQ(string("http://www.w3.org/2005/xpath-functions"), dataset.Attribute("xmlns:fn"));
+ EXPECT_EQ(string("java:java.util.UUID"), dataset.Attribute("xmlns:uuid"));
+ EXPECT_EQ(string("http://whatever"), dataset.Attribute("xmlns:bax"));
+
+ EXPECT_EQ(0, dataset.Filters().Size());
+ EXPECT_EQ(0, dataset.SubDataSets().Size());
+
+ const ExternalResources& resources = dataset.ExternalResources();
+ ASSERT_EQ(3, resources.Size());
+ for (size_t i = 0; i < resources.Size(); ++i) {
+ const ExternalResource& resource = resources[i];
+ if (i == 0) {
+ EXPECT_EQ(string("PacBio.SubreadFile.BaxFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/secondary-siv/testdata/LIMS/2590727/0001/Analysis_Results/m130608_033634_42129_c100515232550000001823076608221351_s1_p0.0.bax.h5"),
+ resource.ResourceId());
+ }
+ else if (i == 1) {
+ EXPECT_EQ(string("PacBio.SubreadFile.BaxFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/secondary-siv/testdata/LIMS/2590727/0001/Analysis_Results/m130608_033634_42129_c100515232550000001823076608221351_s1_p0.1.bax.h5"),
+ resource.ResourceId());
+ }
+ else {
+ EXPECT_EQ(string("PacBio.SubreadFile.BaxFile"), resource.MetaType());
+ EXPECT_EQ(string("file:///mnt/secondary-siv/testdata/LIMS/2590727/0001/Analysis_Results/m130608_033634_42129_c100515232550000001823076608221351_s1_p0.2.bax.h5"),
+ resource.ResourceId());
+ }
+ }
+
+ const DataSetMetadata& metadata = dataset.Metadata();
+ EXPECT_EQ(string("150000"), metadata.NumRecords());
+ EXPECT_EQ(string("50000000"), metadata.TotalLength());
+}
+
+TEST(DataSetIOTest, InspectMalformedXml)
+{
+ const string xmlFn = tests::Data_Dir + "/dataset/malformed.xml";
+
+ DataSet ds(xmlFn);
+ stringstream s;
+ ds.SaveToStream(s);
+
+ const string expected =
+ "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+ "<SubreadSet CreatedAt=\"2015-08-19T15:39:36.331\" Description=\"Merged dataset from 1 files using DatasetMerger 0.1.2\" "
+ "MetaType=\"PacBio.DataSet.HdfSubreadSet\" Name=\"Subreads from runr000013_42267_150403\" "
+ "Tags=\"pacbio.secondary.instrument=RS\" TimeStampedName=\"hdfsubreadset_2015-08-19T15:39:36.331-07:00\" "
+ "UniqueId=\"b4741521-2a4c-42df-8a13-0a755ca9ed1e\" Version=\"0.5\" "
+ "xmlns=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+ "xmlns:ns0=\"http://pacificbiosciences.com/PacBioBaseDataModel.xsd\" "
+ "xmlns:ns1=\"http://pacificbiosciences.com/PacBioSampleInfo.xsd\" "
+ "xmlns:ns2=\"http://pacificbiosciences.com/PacBioCollectionMetadata.xsd\" "
+ "xmlns:ns3=\"http://pacificbiosciences.com/PacBioReagentKit.xsd\" "
+ "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
+ "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDatasets.xsd\">\n"
+ "\t<ns0:ExternalResources>\n"
+ "\t\t<ns0:ExternalResource MetaType=\"SubreadFile.SubreadBamFile\" "
+ "ResourceId=\"file:///mnt/secondary-siv/jenkins/jenkins-bot01/workspace/Ubuntu1404_Mainline_SA3_Tiny_tests/software/smrtanalysis/siv/testkit-jobs/sa3_pipelines/mapping/tiny/job_output-ubuntu1404/tasks/pbsmrtpipe.tasks.h5_subreads_to_subread-0//mnt/secondary-siv/jenkins/jenkins-bot01/workspace/Ubuntu1404_Mainline_SA3_Tiny_tests/software/smrtanalysis/siv/testkit-jobs/sa3_pipelines/mapping/tiny/job_output-ubuntu1404/tasks/pbsmrtpipe.tasks.h5_subreads_to_subread-0/file.subreads.subreads.bam\" "
+ "TimeStampedName=\"SubreadFile.SubreadBamFile_00000000000000\" "
+ "UniqueId=\"251acf71-9eb0-489e-9dd1-cdbd11432753\" />\n"
+ "\t</ns0:ExternalResources>\n"
+ "\t<DataSetMetadata>\n"
+ "\t\t<TotalLength>50000000</TotalLength>\n"
+ "\t\t<NumRecords>150000</NumRecords>\n"
+ "\t\t<ns2:Collections>\n"
+ "\t\t\t<ns2:CollectionMetadata Context=\"m150404_101626_42267_c100807920800000001823174110291514_s1_p0\" "
+ "InstrumentId=\"1\" InstrumentName=\"42267\" MetaType=\"PacBio.Collection\" "
+ "TimeStampedName=\"m150404_101626_42267_c100807920800000001823174110291514_s1_p0\" "
+ "UniqueId=\"d66c8372-2b70-4dcf-b64f-9f8b5cc351fd\">\n"
+ "\t\t\t\t<ns2:InstCtrlVer>2.3.0.1.142990</ns2:InstCtrlVer>\n"
+ "\t\t\t\t<ns2:SigProcVer>NRT@172.31.128.10:8082, SwVer=2301.142990, HwVer=1.0</ns2:SigProcVer>\n"
+ "\t\t\t\t<ns2:RunDetails>\n"
+ "\t\t\t\t\t<ns2:RunId>r000013_42267_150403</ns2:RunId>\n"
+ "\t\t\t\t\t<ns2:Name>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:Name>\n"
+ "\t\t\t\t</ns2:RunDetails>\n"
+ "\t\t\t\t<ns2:WellSample Name=\"Inst42267-040315-SAT-100pM-2kb-P6C4\">\n"
+ "\t\t\t\t\t<ns2:PlateId>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:PlateId>\n"
+ "\t\t\t\t\t<ns2:WellName>Inst42267-040315-SAT-100pM-2kb-P6C4</ns2:WellName>\n"
+ "\t\t\t\t\t<ns2:Concentration>0.0</ns2:Concentration>\n"
+ "\t\t\t\t\t<ns2:SampleReuseEnabled>false</ns2:SampleReuseEnabled>\n"
+ "\t\t\t\t\t<ns2:StageHotstartEnabled>false</ns2:StageHotstartEnabled>\n"
+ "\t\t\t\t\t<ns2:SizeSelectionEnabled>false</ns2:SizeSelectionEnabled>\n"
+ "\t\t\t\t\t<ns2:UseCount>1</ns2:UseCount>\n"
+ "\t\t\t\t\t<ns1:BioSamplePointers>\n"
+ "\t\t\t\t\t\t<ns1:BioSamplePointer>251acf71-9eb0-489e-9dd1-cdbd11432752</ns1:BioSamplePointer>\n"
+ "\t\t\t\t\t</ns1:BioSamplePointers>\n"
+ "\t\t\t\t</ns2:WellSample>\n"
+ "\t\t\t\t<ns2:Automation>\n"
+ "\t\t\t\t\t<ns0:AutomationParameters>\n"
+ "\t\t\t\t\t\t<ns0:AutomationParameter />\n"
+ "\t\t\t\t\t</ns0:AutomationParameters>\n"
+ "\t\t\t\t</ns2:Automation>\n"
+ "\t\t\t\t<ns2:CollectionNumber>7</ns2:CollectionNumber>\n"
+ "\t\t\t\t<ns2:CellIndex>4</ns2:CellIndex>\n"
+ "\t\t\t\t<ns2:CellPac Barcode=\"10080792080000000182317411029151\" />\n"
+ "\t\t\t\t<ns2:Primary>\n"
+ "\t\t\t\t\t<ns2:AutomationName>BasecallerV1</ns2:AutomationName>\n"
+ "\t\t\t\t\t<ns2:ConfigFileName>2-3-0_P6-C4.xml</ns2:ConfigFileName>\n"
+ "\t\t\t\t\t<ns2:SequencingCondition />\n"
+ "\t\t\t\t\t<ns2:OutputOptions>\n"
+ "\t\t\t\t\t\t<ns2:ResultsFolder>Analysis_Results</ns2:ResultsFolder>\n"
+ "\t\t\t\t\t\t<ns2:CollectionPathUri>rsy://mp-rsync/vol55//RS_DATA_STAGING/42267/Inst42267-040315-SAT-100pM-2kb-P6C4_13/A04_7/</ns2:CollectionPathUri>\n"
+ "\t\t\t\t\t\t<ns2:CopyFiles>\n"
+ "\t\t\t\t\t\t\t<ns2:CollectionFileCopy>Fasta</ns2:CollectionFileCopy>\n"
+ "\t\t\t\t\t\t</ns2:CopyFiles>\n"
+ "\t\t\t\t\t\t<ns2:Readout>Bases</ns2:Readout>\n"
+ "\t\t\t\t\t\t<ns2:MetricsVerbosity>Minimal</ns2:MetricsVerbosity>\n"
+ "\t\t\t\t\t</ns2:OutputOptions>\n"
+ "\t\t\t\t</ns2:Primary>\n"
+ "\t\t\t</ns2:CollectionMetadata>\n"
+ "\t\t</ns2:Collections>\n"
+ "\t\t<ns1:BioSamples>\n"
+ "\t\t\t<ns1:BioSample Description=\"Inst42267-SAT-100pM-2kbLambda-P6C4-Std120_CPS_040315\" "
+ "MetaType=\"PacBio.Sample\" Name=\"Inst42267-040315-SAT-100pM-2kb-P6C4\" "
+ "TimeStampedName=\"biosample_2015-08-19T15:39:36.331-07:00\" UniqueId=\"251acf71-9eb0-489e-9dd1-cdbd11432752\" />\n"
+ "\t\t</ns1:BioSamples>\n"
+ "\t</DataSetMetadata>\n"
+ "</SubreadSet>\n";
+
+ EXPECT_EQ(expected, s.str());
+}
+
+TEST(DataSetIOTest, RelativePathCarriedThroughOk_FromString)
+{
+ const string inputXml =
+ "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+ "<pbds:AlignmentSet "
+ "CreatedAt=\"2015-01-27T09:00:01\" "
+ "MetaType=\"PacBio.DataSet.AlignmentSet\" "
+ "Name=\"DataSet_AlignmentSet\" "
+ "Tags=\"barcode moreTags mapping mytags\" "
+ "TimeStampedName=\"biosample_2015-08-19T15:39:36.331-07:00\" "
+ "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" "
+ "Version=\"2.3.0\" "
+ "xmlns=\"http://pacificbiosciences.com/PacBioDataModel.xsd\" "
+ "xmlns:pbds=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+ "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
+ "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDataModel.xsd\">\n"
+ "\t<pbbase:ExternalResources>\n"
+ "\t\t<pbbase:ExternalResource "
+ "Description=\"Points to an example Alignments BAM file.\" "
+ "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+ "Name=\"Third Alignments BAM\" "
+ "ResourceId=\"../path/to/resource1.bam\" "
+ "Tags=\"Example\">\n"
+ "\t\t\t<pbbase:FileIndices>\n"
+ "\t\t\t\t<pbbase:FileIndex "
+ "MetaType=\"PacBio.Index.PacBioIndex\" "
+ "ResourceId=\"../path/to/resource1.bam.pbi\" />\n"
+ "\t\t\t</pbbase:FileIndices>\n"
+ "\t\t</pbbase:ExternalResource>\n"
+ "\t\t<pbbase:ExternalResource "
+ "Description=\"Points to another example Alignments BAM file, by relative path.\" "
+ "MetaType=\"AlignmentFile.AlignmentBamFile\" "
+ "Name=\"Fourth Alignments BAM\" "
+ "ResourceId=\"../path/to/resource2.bam\" "
+ "Tags=\"Example\">\n"
+ "\t\t\t<pbbase:FileIndices>\n"
+ "\t\t\t\t<pbbase:FileIndex "
+ "MetaType=\"PacBio.Index.PacBioIndex\" "
+ "ResourceId=\"../path/to/resource2.bam.pbi\" />\n"
+ "\t\t\t</pbbase:FileIndices>\n"
+ "\t\t</pbbase:ExternalResource>\n"
+ "\t</pbbase:ExternalResources>\n"
+ "</pbds:AlignmentSet>\n";
+
+ auto dataset = DataSet::FromXml(inputXml);
+
+ stringstream stream;
+ dataset.SaveToStream(stream);
+ auto outputXml = stream.str();
+
+ EXPECT_EQ(inputXml, outputXml);
+}
+
+TEST(DataSetIOTest, RelativePathCarriedThroughOk_FromFile)
+{
+ DataSet dataset(tests::Data_Dir + "/relative/relative.xml");
+ auto resources = dataset.ExternalResources();
+ EXPECT_EQ("./a/test.bam", resources[0].ResourceId());
+ EXPECT_EQ("./b/test1.bam", resources[1].ResourceId());
+ EXPECT_EQ("./b/test2.bam", resources[2].ResourceId());
+
+ stringstream out;
+ dataset.SaveToStream(out);
+
+ auto newDataset = DataSet::FromXml(out.str());
+ auto newResources = newDataset.ExternalResources();
+ EXPECT_EQ("./a/test.bam", newResources[0].ResourceId());
+ EXPECT_EQ("./b/test1.bam", newResources[1].ResourceId());
+ EXPECT_EQ("./b/test2.bam", newResources[2].ResourceId());
+}
+
+TEST(DataSetIOTest, DataSetFromRelativeBamFilename)
+{
+ // cache initial directory and move to location so we can test relatvie filename ok
+ const string startingDirectory = internal::FileUtils::CurrentWorkingDirectory();
+
+ const string targetDirectory = tests::Data_Dir + "/dataset";
+ changeCurrentDirectory(targetDirectory);
+ ASSERT_EQ(targetDirectory, internal::FileUtils::CurrentWorkingDirectory());
+
+ EXPECT_NO_THROW(
+ {
+ const string relativeBamFn = "../phi29.bam";
+ const DataSet ds(relativeBamFn);
+ const auto& files = ds.BamFiles();
+ EXPECT_EQ(1, files.size());
+ });
+
+ // restore working directory
+ changeCurrentDirectory(startingDirectory);
+}
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/dataset/DataSet.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace tests {
+
+//static inline
+//DataSet CreateDataSet(void)
+//{
+// DataSet d;
+// d.Name("foo");
+// return d;
+//}
+
+} // namespace tests
+
+TEST(DataSetMetadataTest, DummyTest) {
+ EXPECT_TRUE(true);
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "TestData.h"
+#include <boost/any.hpp>
+#include <gtest/gtest.h>
+#include <pbbam/EntireFileQuery.h>
+#include <pbbam/GenomicIntervalQuery.h>
+#include <pbbam/ZmwQuery.h>
+#include <pbbam/ZmwGroupQuery.h>
+#include <pbbam/DataSet.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+const string alignedBamFn = tests::Data_Dir + "/aligned.bam";
+const string aligned2BamFn = tests::Data_Dir + "/aligned2.bam";
+const string alignedCopyBamFn = tests::GeneratedData_Dir + "/aligned.bam";
+const string aligned2CopyBamFn = tests::GeneratedData_Dir + "/aligned2.bam";
+
+const string group_fofn = tests::Generated_Dir + "/group.fofn";
+const string group_file1 = tests::Data_Dir + "/group/test1.bam";
+const string group_file2 = tests::Data_Dir + "/group/test2.bam";
+const string group_file3 = tests::Data_Dir + "/group/test3.bam";
+
+const vector<string> group_file1_names =
+{
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/24962/0_427"
+};
+
+const vector<string> group_file2_names =
+{
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2114_2531",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2579_4055",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/4101_5571",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/5615_6237"
+};
+
+const vector<string> group_file3_names =
+{
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/45203/0_893",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/45203/0_893",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/46835/3759_4005",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/46835/4052_4686",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/46835/4732_4869",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/47698/9482_9628",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/47698/9675_10333",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/47698/10378_10609",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49050/48_1132",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49194/0_798",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49194/845_1541",
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/49521/0_134"
+};
+
+static inline
+bool InGroup(const string& name, const vector<string>& group)
+{
+ for (const string& s : group) {
+ if (s == name)
+ return true;
+ }
+ return false;
+}
+
+TEST(DataSetQueryTest, EntireFileQueryTest)
+{
+ // single file
+ EXPECT_NO_THROW(
+ {
+ BamFile bamFile(alignedBamFn);
+
+ DataSet dataset;
+ dataset.ExternalResources().Add(bamFile);
+
+ int count =0;
+ EntireFileQuery query(dataset); // from DataSet object
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(4, count);
+
+ count = 0;
+ EntireFileQuery query2(alignedBamFn); // from BAM filename
+ for (const BamRecord& record : query2) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(4, count);
+
+ count = 0;
+ EntireFileQuery query3(bamFile); // from BamFile object
+ for (const BamRecord& record : query3) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(4, count);
+ });
+
+ // duplicate file attempt
+ EXPECT_NO_THROW(
+ {
+ BamFile bamFile(alignedBamFn);
+
+ DataSet dataset;
+ dataset.ExternalResources().Add(bamFile);
+ dataset.ExternalResources().Add(bamFile);
+
+ int count =0;
+ EntireFileQuery query(dataset);
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(4, count); // same as single
+ });
+
+ // true multi-file dataset
+ EXPECT_NO_THROW(
+ {
+ BamFile file1(group_file1); // 1 read
+ BamFile file2(group_file2); // 4 reads
+ BamFile file3(group_file3); // 13 reads
+
+ DataSet dataset;
+ dataset.ExternalResources().Add(file1);
+ dataset.ExternalResources().Add(file2);
+ dataset.ExternalResources().Add(file3);
+
+ int count = 0;
+ EntireFileQuery query(dataset);
+ for (const BamRecord& record : query) {
+
+ // ensure sequential merge of files
+ if (count == 0) EXPECT_TRUE(InGroup(record.FullName(), group_file1_names));
+ else if (count < 5) EXPECT_TRUE(InGroup(record.FullName(), group_file2_names));
+ else EXPECT_TRUE(InGroup(record.FullName(), group_file3_names));
+
+ ++count;
+ }
+ EXPECT_EQ(18, count);
+ });
+
+ // same as above, from FOFN
+ EXPECT_NO_THROW(
+ {
+ int count = 0;
+
+ DataSet dataset(group_fofn);
+ EntireFileQuery query(dataset);
+ for (const BamRecord& record : query) {
+
+ // ensure sequential merge of files
+ if (count == 0) EXPECT_TRUE(InGroup(record.FullName(), group_file1_names));
+ else if (count < 5) EXPECT_TRUE(InGroup(record.FullName(), group_file2_names));
+ else EXPECT_TRUE(InGroup(record.FullName(), group_file3_names));
+
+ ++count;
+ }
+ EXPECT_EQ(18, count);
+ });
+}
+
+TEST(DataSetQueryTest, GenomicIntervalQueryTest)
+{
+ const string rname = "lambda_NEB3011";
+
+ // single file
+ EXPECT_NO_THROW(
+ {
+ DataSet dataset(alignedBamFn); // from BAM filename
+
+ // count records
+ int count = 0;
+ GenomicInterval interval(rname, 5000, 6000);
+ GenomicIntervalQuery query(interval, dataset);
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(2, count);
+
+ // adjust interval and pass back in
+ count = 0;
+ interval.Start(9000);
+ interval.Stop(9500);
+ query.Interval(interval);
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(2, count);
+
+ // unknown ref
+ count = 0;
+ interval.Name("does not exist");
+ interval.Start(0);
+ interval.Stop(100);
+ EXPECT_THROW(query.Interval(interval), std::exception);
+ for (const BamRecord& record : query) { // iteration is still safe, just returns no data
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(0, count);
+
+ // adjust again - make sure we can read a real region after an invalid one
+ interval.Name(rname);
+ interval.Start(5000);
+ interval.Stop(6000);
+ query.Interval(interval);
+ count = 0;
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(2, count);
+ });
+
+ // duplicate file
+ EXPECT_NO_THROW(
+ {
+ BamFile bamFile(alignedBamFn);
+
+ DataSet dataset;
+ dataset.ExternalResources().Add(bamFile);
+ dataset.ExternalResources().Add(bamFile);
+
+ // count records & also ensure sorted merge
+ int count = 0;
+ int prevId = 0;
+ int prevPos = 0;
+
+ GenomicInterval interval(rname, 5000, 6000);
+ GenomicIntervalQuery query(interval, dataset);
+ for (const BamRecord& record : query) {
+
+ EXPECT_TRUE(record.ReferenceId() >= prevId);
+ EXPECT_TRUE(record.ReferenceStart() >= prevPos);
+
+ prevId = record.ReferenceId();
+ prevPos = record.ReferenceStart();
+ ++count;
+ }
+ EXPECT_EQ(2, count); // same as single file
+
+ // adjust interval and pass back in
+ count = 0;
+ interval.Start(9000);
+ interval.Stop(10000);
+ query.Interval(interval);
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(2, count); // same as single file
+
+ // unknown ref
+ count = 0;
+ interval.Name("does not exist");
+ interval.Start(0);
+ interval.Stop(100);
+ EXPECT_THROW(query.Interval(interval), std::exception);
+ for (const BamRecord& record : query) { // iteration is still safe, just returns no data
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(0, count); // same as single file
+
+ // adjust again - make sure we can read a real region after an invalid one
+ interval.Name(rname);
+ interval.Start(5000);
+ interval.Stop(5300);
+ query.Interval(interval);
+ count = 0;
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(2, count); // same as single file
+ });
+
+ // multi file BAM (same record content for easy testing, but different filename(ResourceId)
+ EXPECT_NO_THROW(
+ {
+ BamFile bamFile(alignedBamFn);
+ BamFile copyFile(alignedCopyBamFn);
+
+ DataSet dataset;
+ dataset.ExternalResources().Add(bamFile);
+ dataset.ExternalResources().Add(copyFile);
+
+ // count records & also ensure sorted merge
+ int count = 0;
+ int prevId = 0;
+ int prevPos = 0;
+
+ GenomicInterval interval(rname, 5000, 6000);
+ GenomicIntervalQuery query(interval, dataset);
+ for (const BamRecord& record : query) {
+
+ EXPECT_TRUE(record.ReferenceId() >= prevId);
+ EXPECT_TRUE(record.ReferenceStart() >= prevPos);
+
+ prevId = record.ReferenceId();
+ prevPos = record.ReferenceStart();
+ ++count;
+ }
+ EXPECT_EQ(4, count); // single file * 2
+
+ // adjust interval and pass back in
+ count = 0;
+ interval.Start(9000);
+ interval.Stop(10000);
+ query.Interval(interval);
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(4, count); // single file * 2
+
+ // unknown ref
+ count = 0;
+ interval.Name("does not exist");
+ interval.Start(0);
+ interval.Stop(100);
+ EXPECT_THROW(query.Interval(interval), std::exception);
+ for (const BamRecord& record : query) { // iteration is still safe, just returns no data
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(0, count); // single file * 2
+
+ // adjust again - make sure we can read a real region after an invalid one
+ interval.Name(rname);
+ interval.Start(5000);
+ interval.Stop(5300);
+ query.Interval(interval);
+ count = 0;
+ for (const BamRecord& record : query) {
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(4, count); // single file * 2
+ });
+}
+
+// TODO: implement me
+TEST(DataSetQueryTest, QNameQueryTest)
+{
+ EXPECT_TRUE(true);
+}
+
+TEST(DataSetQueryTest, ZmwQueryTest)
+{
+ const std::vector<int32_t> whitelist = { 13473, 30983 };
+
+ // single file
+ EXPECT_NO_THROW(
+ {
+ BamFile bamFile(aligned2BamFn);
+ ASSERT_TRUE(bamFile.PacBioIndexExists());
+ DataSet dataset(bamFile);
+
+ int count = 0;
+ ZmwQuery query(whitelist, dataset);
+ for (const BamRecord& record: query) {
+ const int32_t holeNumber = record.HoleNumber();
+ EXPECT_TRUE(holeNumber == 13473 || holeNumber == 30983);
+ ++count;
+ }
+ EXPECT_EQ(4, count);
+ });
+
+ // multi-file
+ EXPECT_NO_THROW(
+ {
+ BamFile bamFile(aligned2BamFn);
+ BamFile bamFile2(aligned2CopyBamFn);
+ ASSERT_TRUE(bamFile.PacBioIndexExists());
+ ASSERT_TRUE(bamFile2.PacBioIndexExists());
+
+ DataSet dataset;
+ dataset.ExternalResources().Add(ExternalResource(bamFile));
+ dataset.ExternalResources().Add(ExternalResource(bamFile2));
+
+ int count = 0;
+ ZmwQuery query(whitelist, dataset);
+ for (const BamRecord& r : query) {
+ const auto holeNumber = r.HoleNumber();
+ EXPECT_TRUE(holeNumber == 13473 || holeNumber == 30983);
+ ++count;
+ }
+ EXPECT_EQ(8, count);
+ });
+}
+
+TEST(DataSetQueryTest, ZmwGroupQueryTest)
+{
+ const std::vector<int32_t> whitelist = { 13473, 30983 };
+
+ // single-file
+ EXPECT_NO_THROW(
+ {
+ BamFile bamFile(aligned2BamFn);
+ ASSERT_TRUE(bamFile.PacBioIndexExists());
+ DataSet dataset(bamFile);
+
+ int count = 0;
+ int32_t groupZmw = -1;
+ ZmwGroupQuery query(whitelist, dataset);
+ for (const vector<BamRecord>& group : query) {
+ for (const BamRecord& record: group) {
+ const auto holeNumber = record.HoleNumber();
+ if (groupZmw == -1)
+ groupZmw = holeNumber;
+ EXPECT_TRUE(holeNumber == 13473 || holeNumber == 30983);
+ EXPECT_EQ(groupZmw, holeNumber);
+ ++count;
+ }
+ groupZmw = -1;
+ }
+ EXPECT_EQ(4, count);
+ });
+
+ // multi-file
+ EXPECT_NO_THROW(
+ {
+ BamFile bamFile(aligned2BamFn);
+ BamFile bamFile2(aligned2CopyBamFn);
+ ASSERT_TRUE(bamFile.PacBioIndexExists());
+ ASSERT_TRUE(bamFile2.PacBioIndexExists());
+
+ DataSet dataset;
+ dataset.ExternalResources().Add(ExternalResource(bamFile));
+ dataset.ExternalResources().Add(ExternalResource(bamFile2));
+
+ int totalCount = 0;
+ int numRecordsInGroup = 0;
+ int groupCount = 0;
+ int32_t groupZmw = -1;
+ ZmwGroupQuery query(whitelist, dataset);
+ for (const vector<BamRecord>& group : query) {
+ for (const BamRecord& record: group) {
+ const auto holeNumber = record.HoleNumber();
+ ++numRecordsInGroup;
+ if (groupZmw == -1)
+ groupZmw = holeNumber;
+ EXPECT_TRUE(holeNumber == 13473 || holeNumber == 30983);
+ EXPECT_EQ(groupZmw, holeNumber);
+ ++totalCount;
+ }
+ if (groupCount == 0)
+ EXPECT_EQ(4, numRecordsInGroup);
+ else if (groupCount == 1)
+ EXPECT_EQ(4, numRecordsInGroup);
+ else
+ EXPECT_TRUE(false); // should not get here
+ numRecordsInGroup = 0;
+ ++groupCount;
+ groupZmw = -1;
+ }
+ EXPECT_EQ(8, totalCount);
+ });
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/DataSet.h>
+#include <pbbam/DataSetXsd.h>
+#include <string>
+#include <sstream>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(DataSetXsdTest, DefaultsOk)
+{
+ NamespaceRegistry registry;
+
+ const NamespaceInfo& baseInfo = registry.Namespace(XsdType::BASE_DATA_MODEL);
+ const NamespaceInfo& dsInfo = registry.Namespace(XsdType::DATASETS);
+ const NamespaceInfo& defaultInfo = registry.DefaultNamespace();
+
+ EXPECT_EQ(XsdType::DATASETS, registry.DefaultXsd());
+
+ EXPECT_EQ(string("pbds"), dsInfo.Name());
+ EXPECT_EQ(string("pbbase"), baseInfo.Name());
+ EXPECT_EQ(string("pbds"), defaultInfo.Name());
+
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioBaseDataModel.xsd"), baseInfo.Uri());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), dsInfo.Uri());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), defaultInfo.Uri());
+}
+
+TEST(DataSetXsdTest, EditDefaultOk)
+{
+ NamespaceRegistry registry;
+ registry.SetDefaultXsd(XsdType::DATASETS);
+
+ const NamespaceInfo& defaultInfo = registry.DefaultNamespace();
+
+ EXPECT_EQ(XsdType::DATASETS, registry.DefaultXsd());
+ EXPECT_EQ(string("pbds"), defaultInfo.Name());
+ EXPECT_EQ(string("http://pacificbiosciences.com/PacBioDatasets.xsd"), defaultInfo.Uri());
+}
+
+TEST(DataSetXsdTest, EditRegistryOk)
+{
+ NamespaceRegistry registry;
+ registry.Register(XsdType::DATASETS, NamespaceInfo("custom", "http://custom/uri.xsd"));
+
+ const NamespaceInfo& dsInfo = registry.Namespace(XsdType::DATASETS);
+
+ EXPECT_EQ(string("custom"), dsInfo.Name());
+ EXPECT_EQ(string("http://custom/uri.xsd"), dsInfo.Uri());
+}
+
+TEST(DataSetXsdTest, EditDatasetRegistry)
+{
+ DataSet dataset(DataSet::ALIGNMENT);
+ dataset.CreatedAt("2015-01-27T09:00:01");
+ dataset.MetaType("PacBio.DataSet.AlignmentSet");
+ dataset.Name("DataSet_AlignmentSet");
+ dataset.Tags("barcode moreTags mapping mytags");
+ dataset.TimeStampedName("my_time_stamped_name");
+ dataset.UniqueId("b095d0a3-94b8-4918-b3af-a3f81bbe519c");
+ dataset.Attribute("xmlns", "http://pacificbiosciences.com/PacBioDatasets.xsd")
+ .Attribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
+ .Attribute("xsi:schemaLocation", "http://pacificbiosciences.com/PacBioDatasets.xsd");
+
+ ExternalResource ext("Fake.MetaType", "filename");
+ ext.TimeStampedName("custom_tsn")
+ .UniqueId("my_uuid");
+ dataset.ExternalResources().Add(ext);
+
+ dataset.Namespaces().Register(XsdType::BASE_DATA_MODEL, NamespaceInfo("custom", "http://custom/uri.xsd"));
+
+ const string expectedXml =
+ "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+ "<pbds:AlignmentSet CreatedAt=\"2015-01-27T09:00:01\" MetaType=\"PacBio.DataSet.AlignmentSet\" "
+ "Name=\"DataSet_AlignmentSet\" Tags=\"barcode moreTags mapping mytags\" "
+ "TimeStampedName=\"my_time_stamped_name\" "
+ "UniqueId=\"b095d0a3-94b8-4918-b3af-a3f81bbe519c\" Version=\"3.0.1\" "
+ "xmlns=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+ "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
+ "xsi:schemaLocation=\"http://pacificbiosciences.com/PacBioDatasets.xsd\" "
+ "xmlns:custom=\"http://custom/uri.xsd\" "
+ "xmlns:pbds=\"http://pacificbiosciences.com/PacBioDatasets.xsd\">\n"
+ "\t<custom:ExternalResources>\n"
+ "\t\t<custom:ExternalResource MetaType=\"Fake.MetaType\" ResourceId=\"filename\" TimeStampedName=\"custom_tsn\" UniqueId=\"my_uuid\" Version=\"3.0.1\" />\n"
+ "\t</custom:ExternalResources>\n"
+ "</pbds:AlignmentSet>\n";
+
+ stringstream s;
+ dataset.SaveToStream(s);
+ EXPECT_EQ(expectedXml, s.str());
+}
+
+TEST(DataSetXsdTest, ElementRegistryOk)
+{
+ { // default namespaces
+
+ DataSet ds;
+
+ // append child elements that do not have a C++ built-in, nor namespace prefix with addition
+ DataSetMetadata& metadata = ds.Metadata();
+ metadata.AddChild(internal::DataSetElement("SummaryStats"));
+ metadata.AddChild(internal::DataSetElement("CopyFiles"));
+ metadata.AddChild(internal::DataSetElement("BioSamples"));
+ metadata.AddChild(internal::DataSetElement("AutomationParameters"));
+
+ stringstream s;
+ ds.SaveToStream(s);
+ const string output = s.str();
+
+ // check that default namespace is propagated properly
+ EXPECT_TRUE(output.find("pbds:SummaryStats") != string::npos);
+ EXPECT_TRUE(output.find("pbmeta:CopyFiles") != string::npos);
+ EXPECT_TRUE(output.find("pbsample:BioSamples") != string::npos);
+ EXPECT_TRUE(output.find("pbbase:AutomationParameters") != string::npos);
+ }
+
+ { // custom namespaces
+
+ DataSet ds;
+
+ // setup custom namespaces
+ ds.Namespaces().Register(XsdType::BASE_DATA_MODEL, NamespaceInfo("custom_base", "http://custom/base.xsd"));
+ ds.Namespaces().Register(XsdType::COLLECTION_METADATA, NamespaceInfo("custom_meta", "http://custom/meta.xsd"));
+ ds.Namespaces().Register(XsdType::DATASETS, NamespaceInfo("custom_ds", "http://custom/datasets.xsd"));
+ ds.Namespaces().Register(XsdType::SAMPLE_INFO, NamespaceInfo("custom_sample", "http://custom/base.xsd"));
+
+ // append child elements that do not have a C++ built-in, nor namespace prefix with addition
+ DataSetMetadata& metadata = ds.Metadata();
+ metadata.AddChild(internal::DataSetElement("SummaryStats"));
+ metadata.AddChild(internal::DataSetElement("CopyFiles"));
+ metadata.AddChild(internal::DataSetElement("BioSamples"));
+ metadata.AddChild(internal::DataSetElement("AutomationParameters"));
+
+ stringstream s;
+ ds.SaveToStream(s);
+ const string output = s.str();
+
+ // check that custom namespace is propagated properly
+ EXPECT_TRUE(output.find("custom_ds:SummaryStats") != string::npos);
+ EXPECT_TRUE(output.find("custom_meta:CopyFiles") != string::npos);
+ EXPECT_TRUE(output.find("custom_sample:BioSamples") != string::npos);
+ EXPECT_TRUE(output.find("custom_base:AutomationParameters") != string::npos);
+ }
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#define protected public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <htslib/sam.h>
+#include <pbbam/BamFile.h>
+#include <pbbam/BamWriter.h>
+#include <pbbam/EntireFileQuery.h>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <cstdio>
+#include <cstdlib>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::tests;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
+struct Bam1Deleter
+{
+ void operator()(bam1_t* b) {
+ if (b)
+ bam_destroy1(b);
+ b = nullptr;
+ }
+};
+
+struct SamFileDeleter
+{
+ void operator()(samFile* file) {
+ if (file)
+ sam_close(file);
+ file = nullptr;
+ }
+};
+
+struct BamHdrDeleter
+{
+ void operator()(bam_hdr_t* hdr) {
+ if (hdr)
+ bam_hdr_destroy(hdr);
+ hdr = nullptr;
+ }
+};
+
+const string inputBamFn = tests::Data_Dir + "/aligned.bam";
+const string goldStandardSamFn = tests::Data_Dir + "/aligned.sam";
+const string generatedBamFn = tests::GeneratedData_Dir + "/generated.bam";
+const string generatedSamFn = tests::GeneratedData_Dir + "/generated.sam";
+const vector<string> generatedFiles = { generatedBamFn, generatedSamFn };
+
+static inline
+int RunBam2Sam(const string& bamFn,
+ const string& samFn,
+ const string& args = string())
+{
+ stringstream s;
+ s << tests::Bam2Sam << " " << args << " " << bamFn << " > " << samFn;
+ return system(s.str().c_str());
+}
+
+static inline
+int RunDiff(const string& fn1, const string& fn2)
+{
+ stringstream s;
+ s << "diff " << fn1 << " " << fn2;
+ return system(s.str().c_str());
+}
+
+static inline
+void Remove(const vector<string>& files)
+{
+ for (const auto& fn : files)
+ remove(fn.c_str());
+}
+
+static inline
+void CheckGeneratedOutput(void)
+{
+ // convert to sam & diff against gold standard
+ const int convertRet = RunBam2Sam(generatedBamFn, generatedSamFn);
+ const int diffRet = RunDiff(goldStandardSamFn, generatedSamFn);
+ EXPECT_EQ(0, convertRet);
+ EXPECT_EQ(0, diffRet);
+
+ // clean up
+ Remove(generatedFiles);
+}
+
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
+// sanity check for rest of tests below
+TEST(EndToEndTest, ReadAndWrite_PureHtslib)
+{
+ { // scoped to force flush & close before conversion/diff
+
+ // open files
+
+ unique_ptr<samFile, SamFileDeleter> inWrapper(sam_open(inputBamFn.c_str(), "r"));
+ samFile* in = inWrapper.get();
+ ASSERT_TRUE(in);
+
+ unique_ptr<samFile, SamFileDeleter> outWrapper(sam_open(generatedBamFn.c_str(), "wb"));
+ samFile* out = outWrapper.get();
+ ASSERT_TRUE(out);
+
+ // fetch & write header
+
+ unique_ptr<bam_hdr_t, BamHdrDeleter> headerWrapper(sam_hdr_read(in));
+ bam_hdr_t* hdr = headerWrapper.get();
+ ASSERT_TRUE(hdr);
+ ASSERT_EQ(0, sam_hdr_write(out, hdr));
+
+ // fetch & write records
+
+ unique_ptr<bam1_t, Bam1Deleter> record(bam_init1());
+ bam1_t* b = record.get();
+ ASSERT_TRUE(b);
+
+ while (sam_read1(in, hdr, b) >= 0)
+ sam_write1(out, hdr, b);
+ }
+
+ CheckGeneratedOutput();
+}
+
+TEST(EndToEndTest, ReadAndWrite_SingleThread)
+{
+ EXPECT_NO_THROW(
+ {
+ // open input BAM file
+ BamFile bamFile(tests::inputBamFn);
+
+ // open output BAM file
+ BamWriter writer(tests::generatedBamFn, bamFile.Header(), BamWriter::DefaultCompression, 1);
+
+ // copy BAM file
+ EntireFileQuery entireFile(bamFile);
+ for (const BamRecord& record : entireFile)
+ writer.Write(record);
+ });
+
+ CheckGeneratedOutput();
+}
+
+TEST(EndToEndTest, ReadAndWrite_APIDefaultThreadCount)
+{
+ EXPECT_NO_THROW(
+ {
+ // open input BAM file
+ BamFile bamFile(inputBamFn);
+
+ // open output BAM file
+ BamWriter writer(generatedBamFn, bamFile.Header());
+
+ // copy BAM file
+ EntireFileQuery entireFile(bamFile);
+ for (const BamRecord& record : entireFile)
+ writer.Write(record);
+ });
+
+ CheckGeneratedOutput();
+}
+
+TEST(EndToEndTest, ReadAndWrite_SystemDefaultThreadCount)
+{
+ EXPECT_NO_THROW(
+ {
+ // open input BAM file
+ BamFile bamFile(inputBamFn);
+
+ // open output BAM file
+ BamWriter writer(generatedBamFn,
+ bamFile.Header(),
+ BamWriter::DefaultCompression,
+ 0);
+
+ // copy BAM file
+ EntireFileQuery entireFile(bamFile);
+ for (const BamRecord& record : entireFile)
+ writer.Write(record);
+ });
+
+ CheckGeneratedOutput();
+}
+
+TEST(EndToEndTest, ReadAndWrite_UserThreadCount)
+{
+ EXPECT_NO_THROW(
+ {
+ // open input BAM file
+ BamFile bamFile(inputBamFn);
+
+ // open output BAM file
+ BamWriter writer(generatedBamFn,
+ bamFile.Header(),
+ BamWriter::DefaultCompression,
+ 3);
+
+ // copy BAM file
+ EntireFileQuery entireFile(bamFile);
+ for (const BamRecord& record : entireFile)
+ writer.Write(record);
+ });
+
+ CheckGeneratedOutput();
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/EntireFileQuery.h>
+#include <pbbam/BamWriter.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+const string inputBamFn = tests::Data_Dir + "/aligned.bam";
+
+TEST(EntireFileQueryTest, CountRecords)
+{
+ EXPECT_NO_THROW(
+ {
+ BamFile bamFile(inputBamFn);
+ int count = 0;
+ EntireFileQuery entireFile(bamFile);
+ for (const BamRecord& record : entireFile) {
+ (void)record;
+ ++count;
+ }
+
+ EXPECT_EQ(4, count);
+ });
+}
+
+TEST(EntireFileQueryTest, NonConstBamRecord)
+{
+ EXPECT_NO_THROW(
+ {
+ BamFile bamFile(inputBamFn);
+ int count = 0;
+ EntireFileQuery entireFile(bamFile);
+ for (BamRecord& record : entireFile) {
+ (void)record;
+ ++count;
+ }
+
+ EXPECT_EQ(4, count);
+ });
+}
+
+TEST(BamRecordTest, HandlesDeletionOK)
+{
+ // this file raised no error in Debug mode, but segfaulted when
+ // trying to access the aligned qualities in Release mode
+
+ const string problemBamFn = tests::Data_Dir + "/segfault.bam";
+ BamFile bamFile(problemBamFn);
+ int count = 0;
+ EntireFileQuery entireFile(bamFile);
+ for (const BamRecord& record : entireFile) {
+
+ const auto rawQualities = record.Qualities(Orientation::GENOMIC, false);
+ const auto alignedQualities = record.Qualities(Orientation::GENOMIC, true);
+
+ const string rawExpected =
+ "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII";
+
+ // 1=1D98=
+ const string alignedExpected =
+ "I!IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII";
+
+ EXPECT_EQ(rawExpected, rawQualities.Fastq());
+ EXPECT_EQ(alignedExpected, alignedQualities.Fastq());
+
+ ++count;
+ }
+
+ EXPECT_EQ(1, count);
+}
+
+
+TEST(BamRecordTest, ReferenceName)
+{
+ { // check reference name of first record
+ const string exampleBam = tests::Data_Dir + "/aligned.bam";
+ BamFile bamFile(exampleBam);
+ EntireFileQuery records(bamFile);
+ auto firstIter = records.begin();
+ auto& firstRecord = *firstIter;
+ ASSERT_TRUE(firstRecord.IsMapped());
+ EXPECT_EQ("lambda_NEB3011", firstRecord.ReferenceName());
+ }
+
+ { // unmapped records have no reference name, should throw
+ const string exampleBam = tests::Data_Dir + "/unmap1.bam";
+ BamFile bamFile(exampleBam);
+ EntireFileQuery records(bamFile);
+ auto firstIter = records.begin();
+ auto& firstRecord = *firstIter;
+ ASSERT_FALSE(firstRecord.IsMapped());
+ EXPECT_THROW(firstRecord.ReferenceName(), std::runtime_error);
+ }
+}
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/FastaReader.h>
+#include <pbbam/FastaSequence.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+static void CheckSequence(const size_t index, const FastaSequence& seq)
+{
+ SCOPED_TRACE("checking FASTA seq:" + std::to_string(index));
+ switch (index) {
+ case 0 :
+ EXPECT_EQ("1", seq.Name());
+ EXPECT_EQ("TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACAACGCAGCTCCGCCCTCGCGGTGCTCTCCGGGTCTGTGCTGAGGAGAACGCAACTCCGCCGGCGCAGGCG", seq.Bases());
+ break;
+
+ case 1 :
+ EXPECT_EQ("2", seq.Name());
+ EXPECT_EQ("TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACAACGCAGCTCCGCCCTCGCGGTGCTCTCCGGGTCTGTGCTGAGGAGAACGCAAC", seq.Bases());
+ break;
+
+ case 2 :
+ EXPECT_EQ("3", seq.Name());
+ EXPECT_EQ("TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACACCCTAACCCCAACCCCAACCCCAACCCCAACCCCAACCCCAACCCTAACCCCTAACCCTAACCCT", seq.Bases());
+ break;
+
+ default:
+ ASSERT_TRUE(false); // invalid index
+ }
+}
+
+TEST(FastaSequenceTest, BasicConstructorOk)
+{
+ FastaSequence seq{ "1", "GATTACA" };
+ EXPECT_EQ("1", seq.Name());
+ EXPECT_EQ("GATTACA", seq.Bases());
+}
+
+TEST(FastaReaderTest, IterableOk)
+{
+ const string fn = tests::GeneratedData_Dir + "/normal.fa";
+ FastaReader reader{ fn };
+
+ size_t count = 0;
+ FastaSequence seq;
+ while (reader.GetNext(seq)) {
+ CheckSequence(count, seq);
+ ++count;
+ }
+ EXPECT_EQ(3, count);
+}
+
+TEST(FastaReaderTest, ReadAllOk)
+{
+ const string fn = tests::GeneratedData_Dir + "/normal.fa";
+
+ size_t count = 0;
+ for (const auto& seq : FastaReader::ReadAll(fn)) {
+ CheckSequence(count, seq);
+ ++count;
+ }
+ EXPECT_EQ(3, count);
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/../../src/FileUtils.h>
+#include <pbbam/../../src/TimeUtils.h>
+
+#include <boost/algorithm/string.hpp>
+
+#include <chrono>
+#include <string>
+#include <vector>
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+
+#include <iostream>
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+TEST(FileUtilsTest, ExistsOk)
+{
+ EXPECT_FALSE(FileUtils::Exists("does_not_exist.txt"));
+
+ const string tmp = tests::GeneratedData_Dir + "/pbbam_exists_check.tmp";
+ const string cmd = string("touch ") + tmp;
+ ASSERT_EQ(0, system(cmd.c_str()));
+ EXPECT_TRUE(FileUtils::Exists(tmp));
+}
+
+TEST(FileUtilsTest, LastModifiedOk)
+{
+ // a little tricky to check without going a full 'mock' filesystem route, but we can approximate
+ //
+ // also, I can't seem to get better than second resolution (on OSX 10.9/clang at least, st_mtimespec.tv_nsec is always zero)
+
+ const auto now = CurrentTime();
+ const auto nowDuration = now.time_since_epoch();
+ const auto nowSeconds = chrono::duration_cast<chrono::seconds>(nowDuration).count();
+
+ const string tmp = tests::GeneratedData_Dir + "/pbbam_lastmod_check.tmp";
+ const string rmCmd = string("rm ") + tmp;
+ const string touchCmd = string("touch ") + tmp;
+ int ret = system(rmCmd.c_str());
+ (void)ret; // unused
+ ASSERT_EQ(0, system(touchCmd.c_str()));
+
+ const auto stamp = FileUtils::LastModified(tmp);
+ const auto stampDuration = stamp.time_since_epoch();
+ const auto stampSeconds = chrono::duration_cast<chrono::seconds>(stampDuration).count();
+
+ EXPECT_LE(nowSeconds, stampSeconds);
+}
+
+TEST(FileUtilsTest, ResolvedFilePathOk)
+{
+ const string testFrom = "/path/to/myDir";
+
+ // "raw" filenames - no URI scheme
+
+ const string absolutePath = "/absolute/path/to/file.txt";
+ const string relativePath = "../relative/path/to/file.txt";
+ const string noPathFn = "file.txt";
+
+ const string resolvedAbsolutePath = FileUtils::ResolvedFilePath(absolutePath, testFrom);
+ const string resolvedRelativePath = FileUtils::ResolvedFilePath(relativePath, testFrom);
+ const string resolvedNoPath = FileUtils::ResolvedFilePath(noPathFn, testFrom);
+ const string resolvedAbsolutePath_defaultFrom = FileUtils::ResolvedFilePath(absolutePath);
+ const string resolvedRelativePath_defaultFrom = FileUtils::ResolvedFilePath(relativePath);
+ const string resolvedNoPath_defaultFrom = FileUtils::ResolvedFilePath(noPathFn);
+
+ EXPECT_EQ("/absolute/path/to/file.txt", resolvedAbsolutePath);
+ EXPECT_EQ("/path/to/myDir/../relative/path/to/file.txt", resolvedRelativePath);
+ EXPECT_EQ("/path/to/myDir/file.txt", resolvedNoPath);
+
+ EXPECT_EQ("/absolute/path/to/file.txt", resolvedAbsolutePath_defaultFrom);
+ EXPECT_EQ("./../relative/path/to/file.txt", resolvedRelativePath_defaultFrom);
+ EXPECT_EQ("./file.txt", resolvedNoPath_defaultFrom);
+
+ // filenames with URI scheme ("file://")
+
+ const string absoluteSchemeFn = "file:///absolute/path/to/file.txt";
+ const string relativeSchemeFn = "file://../relative/path/to/file.txt";
+ const string noPathSchemeFn = "file://file.txt";
+
+ const string resolvedAbsoluteSchemePath = FileUtils::ResolvedFilePath(absoluteSchemeFn, testFrom);
+ const string resolvedRelativeSchemePath = FileUtils::ResolvedFilePath(relativeSchemeFn, testFrom);
+ const string resolvedNoPathSchemeFn = FileUtils::ResolvedFilePath(noPathSchemeFn, testFrom);
+ const string resolvedAbsoluteSchemePath_defaultFrom = FileUtils::ResolvedFilePath(absoluteSchemeFn);
+ const string resolvedRelativeSchemePath_defaultFrom = FileUtils::ResolvedFilePath(relativeSchemeFn);
+ const string resolvedNoPathSchemeFn_defaultFrom = FileUtils::ResolvedFilePath(noPathSchemeFn);
+
+ EXPECT_EQ("/absolute/path/to/file.txt", resolvedAbsoluteSchemePath);
+ EXPECT_EQ("/path/to/myDir/../relative/path/to/file.txt", resolvedRelativeSchemePath);
+ EXPECT_EQ("/path/to/myDir/file.txt", resolvedNoPathSchemeFn);
+
+ EXPECT_EQ("/absolute/path/to/file.txt", resolvedAbsoluteSchemePath_defaultFrom);
+ EXPECT_EQ("./../relative/path/to/file.txt", resolvedRelativeSchemePath_defaultFrom);
+ EXPECT_EQ("./file.txt", resolvedNoPathSchemeFn_defaultFrom);
+}
+
+TEST(FileUtilsTest, SizeOk)
+{
+ const string tmp = tests::GeneratedData_Dir + "/pbbam_empty_file.tmp";
+ const string cmd = string("touch ") + tmp;
+ ASSERT_EQ(0, system(cmd.c_str()));
+ EXPECT_EQ(0, FileUtils::Size(tmp));
+
+ EXPECT_THROW(FileUtils::Size("does_not_exist.txt"), std::runtime_error);
+}
+
+// ####################################################################################################
+// The code below is part of a simple check whether or not a (Windows-only) file path is absolute.
+//
+// NOTE: (and this is admittedly brittle for maintenance, but) the internal methods used are literally
+// copied here for direct driving. There's likely a better way going forward, than the manual copy/paste.
+// But in the absence of a similar runtime environment to build in & test against, while
+// the motivating behavior is blocking other work, this lets me get the fix in their hands ASAP and still
+// have some test code poking it beforehand. -DB
+//
+namespace test_windows {
+
+static string removeFileUriScheme(const string& uri)
+{
+ assert(!uri.empty());
+
+ auto schemeLess = uri;
+ const auto fileScheme = string{"file://"};
+ const auto schemeFound = schemeLess.find(fileScheme);
+ if (schemeFound != string::npos) {
+ if (schemeFound != 0)
+ throw runtime_error("Malformed URI: scheme not at beginning");
+ schemeLess = schemeLess.substr(fileScheme.size());
+ }
+ return schemeLess;
+}
+
+static
+string removeDiskName(const string& filePath)
+{
+ if (filePath.size() >= 2) {
+ const char firstChar = filePath.at(0);
+ if ((isalpha(firstChar) != 0) && (filePath.at(1) == ':'))
+ return filePath.substr(2);
+ }
+ return filePath;
+}
+
+static const char native_pathSeparator = '\\';
+
+static bool native_pathIsAbsolute(const string& filePath)
+{
+ assert(!filePath.empty());
+
+ // if starts with single slash or double slash [cases 1,3]
+ if (boost::algorithm::starts_with(filePath, "\\"))
+ return true;
+
+ // if starts with single or double-dots -> not absolute [case 4 + ".\file.txt"]
+ if (boost::algorithm::starts_with(filePath, "."))
+ return false;
+
+ // if starts with drive name and colon ("C:\foo\bar.txt")
+ if (filePath.size() >= 2) {
+ const char firstChar = filePath.at(0);
+ if ((isalpha(firstChar) != 0) && (filePath.at(1) == ':'))
+ return native_pathIsAbsolute(removeDiskName(filePath));
+ }
+
+ // otherwise, likely relative
+ return false;
+}
+
+static string native_resolvedFilePath(const string& filePath,
+ const string& from)
+{
+ // strip file:// scheme if present
+ auto schemeLess = removeFileUriScheme(filePath);
+
+ // if empty or already absolute path, just return it
+ // upfront empty check simplifies further parsing logic
+ if (schemeLess.empty() || native_pathIsAbsolute(schemeLess))
+ return schemeLess;
+
+ // else make relative from the provided 'from' directory
+ //
+ // first pop disk name, then any leading single-dot '.'
+ //
+ // since we're prepending the 'from' directory, we can remove
+ // any leading './' form our file path. this may just mean that
+ // we pop it off to add it right back (when from == '.'), but this
+ // keeps it consistent with other 'from' parent directories
+ //
+ schemeLess = removeDiskName(schemeLess);
+
+ const bool thisDirAtStart = (schemeLess.find(".") == 0);
+ if (thisDirAtStart) {
+ if (schemeLess.find(native_pathSeparator) == 1)
+ schemeLess = schemeLess.substr(2);
+ }
+ return from + native_pathSeparator + schemeLess;
+}
+
+} // namespace test_windows
+
+TEST(FileUtilsTest, WindowsPathsOk)
+{
+ { // remove disk name
+
+ // "C:\tmp.txt"
+ string f1 = "C:\\tmp.txt";
+ EXPECT_EQ(string("\\tmp.txt"), test_windows::removeDiskName(f1));
+
+ // "C:tmp.txt"
+ string f2 = "C:tmp.txt";
+ EXPECT_EQ(string("tmp.txt"), test_windows::removeDiskName(f2));
+
+ // "\tmp.txt"
+ string f3 = "\\tmp.txt";
+ EXPECT_EQ(f3, test_windows::removeDiskName(f3));
+
+ // "tmp.txt"
+ string f4 = "tmp.txt";
+ EXPECT_EQ(f4, test_windows::removeDiskName(f4));
+ }
+
+ { // isAbsolute ?
+
+ // "\\server\path\to\tmp.txt"
+ EXPECT_TRUE(test_windows::native_pathIsAbsolute("\\\\server\\path\\to\tmp.txt"));
+
+ // "..\tmp.txt"
+ EXPECT_FALSE(test_windows::native_pathIsAbsolute("..\\tmp.txt"));
+
+ // ".\tmp.txt"
+ EXPECT_FALSE(test_windows::native_pathIsAbsolute(".\\tmp.txt"));
+
+ // "C:\path\to\tmp.txt"
+ EXPECT_TRUE(test_windows::native_pathIsAbsolute("C:\\path\\to\\tmp.txt"));
+
+ // "C:..\path\to\tmp.txt"
+ EXPECT_FALSE(test_windows::native_pathIsAbsolute("C:..\\path\\to\\tmp.txt"));
+ }
+
+ { // resolve file path
+
+ const string myRootDir = "C:\\path\\to\\myRootDir";
+
+ // "\\server\path\to\tmp.txt"
+ const string fn1 = "\\\\server\\path\\to\tmp.txt";
+ const string fn1_expected = fn1;
+ EXPECT_EQ(fn1_expected, test_windows::native_resolvedFilePath(fn1, myRootDir));
+
+ // "..\tmp.txt"
+ const string fn2 = "..\\tmp.txt";
+ const string fn2_expected = "C:\\path\\to\\myRootDir\\..\\tmp.txt";
+ EXPECT_EQ(fn2_expected, test_windows::native_resolvedFilePath(fn2, myRootDir));
+
+ // ".\tmp.txt"
+ const string fn3 = ".\\tmp.txt";
+ const string fn3_expected = "C:\\path\\to\\myRootDir\\tmp.txt";
+ EXPECT_EQ(fn3_expected, test_windows::native_resolvedFilePath(fn3, myRootDir));
+
+ // "C:\path\to\tmp.txt"
+ const string fn4 = "C:\\path\\to\\tmp.txt";
+ const string fn4_expected = fn4;
+ EXPECT_EQ(fn4_expected, test_windows::native_resolvedFilePath(fn4, myRootDir));
+
+ // "C:..\path\to\tmp.txt"
+ const string fn5 = "C:..\\path\\to\\tmp.txt";
+ const string fn5_expected = "C:\\path\\to\\myRootDir\\..\\path\\to\\tmp.txt";
+ EXPECT_EQ(fn5_expected, test_windows::native_resolvedFilePath(fn5, myRootDir));
+
+ // "C:tmp.txt"
+ const string fn6 = "C:tmp.txt";
+ const string fn6_expected = "C:\\path\\to\\myRootDir\\tmp.txt";
+ EXPECT_EQ(fn6_expected, test_windows::native_resolvedFilePath(fn6, myRootDir));
+ EXPECT_EQ(fn3_expected, test_windows::native_resolvedFilePath(fn6, myRootDir)); // our path is equivalent to fn3's "./temp.txt"
+ }
+}
+//
+// ####################################################################################################
+
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/Frames.h>
+#include <string>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace tests {
+
+static const vector<uint16_t> testFrames =
+{
+ 0, 8, 140, 0, 0, 7, 4, 0, 85, 2,
+ 1, 3, 2, 10, 1, 20, 47, 10, 9, 60,
+ 20, 3, 12, 5, 13, 165, 6, 14, 22, 12,
+ 2, 4, 9, 218, 27, 3, 15, 2, 17, 2,
+ 45, 24, 89, 10, 7, 1, 11, 15, 0, 7,
+ 0, 28, 17, 12, 6, 10, 37, 0, 12, 52,
+ 0, 7, 1, 14, 3, 26, 12, 0, 20, 17,
+ 2, 13, 2, 9, 13, 7, 15, 29, 3, 6,
+ 2, 1, 28, 10, 3, 14, 7, 1, 22, 1,
+ 6, 6, 0, 19, 31, 6, 2, 14, 0, 0,
+ 1000, 947, 948
+};
+
+static const vector<uint8_t> encodedFrames =
+{
+ 0, 8, 102, 0, 0, 7, 4, 0, 75, 2, 1, 3, 2,
+ 10, 1, 20, 47, 10, 9, 60, 20, 3, 12, 5, 13, 115,
+ 6, 14, 22, 12, 2, 4, 9, 135, 27, 3, 15, 2, 17,
+ 2, 45, 24, 77, 10, 7, 1, 11, 15, 0, 7, 0, 28,
+ 17, 12, 6, 10, 37, 0, 12, 52, 0, 7, 1, 14, 3,
+ 26, 12, 0, 20, 17, 2, 13, 2, 9, 13, 7, 15, 29,
+ 3, 6, 2, 1, 28, 10, 3, 14, 7, 1, 22, 1, 6,
+ 6, 0, 19, 31, 6, 2, 14, 0, 0,
+ 255, 254, 255
+};
+
+} // namespace tests
+
+TEST(FramesTest, Constructors)
+{
+ const Frames f;
+ ASSERT_TRUE(f.Data().empty());
+
+ const Frames f2(tests::testFrames);
+ const auto d = f2.Data();
+ ASSERT_EQ(tests::testFrames, d);
+}
+
+TEST(FramesTest, Encoded)
+{
+ const Frames f(tests::testFrames);
+ const auto e = f.Encode();
+ ASSERT_EQ(tests::encodedFrames, e);
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.\r
+//\r
+// All rights reserved.\r
+//\r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted (subject to the limitations in the\r
+// disclaimer below) provided that the following conditions are met:\r
+//\r
+// * Redistributions of source code must retain the above copyright\r
+// notice, this list of conditions and the following disclaimer.\r
+//\r
+// * Redistributions in binary form must reproduce the above\r
+// copyright notice, this list of conditions and the following\r
+// disclaimer in the documentation and/or other materials provided\r
+// with the distribution.\r
+//\r
+// * Neither the name of Pacific Biosciences nor the names of its\r
+// contributors may be used to endorse or promote products derived\r
+// from this software without specific prior written permission.\r
+//\r
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE\r
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC\r
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED\r
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\r
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS\r
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\r
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF\r
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\r
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\r
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT\r
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
+// SUCH DAMAGE.\r
+\r
+// Author: Derek Barnett\r
+\r
+#ifdef PBBAM_TESTING\r
+#define private public\r
+#endif\r
+\r
+#include "TestData.h"\r
+#include <gtest/gtest.h>\r
+#include <pbbam/GenomicIntervalQuery.h>\r
+#include <iostream>\r
+#include <string>\r
+using namespace PacBio;\r
+using namespace PacBio::BAM;\r
+using namespace std;\r
+\r
+const string inputBamFn = tests::Data_Dir + "/aligned.bam";\r
+\r
+TEST(GenomicIntervalQueryTest, ReuseQueryAndCountRecords)\r
+{\r
+ const string rname = "lambda_NEB3011";\r
+\r
+ BamFile bamFile(inputBamFn);\r
+\r
+ // setup with normal interval\r
+ int count = 0;\r
+ GenomicInterval interval(rname, 5000, 6000);\r
+ GenomicIntervalQuery query(interval, bamFile);\r
+ for (const BamRecord& record : query) {\r
+ (void)record;\r
+ ++count;\r
+ }\r
+ EXPECT_EQ(2, count);\r
+\r
+ // adjust interval and pass back in\r
+ count = 0;\r
+ interval.Start(9300);\r
+ interval.Stop(9400);\r
+ query.Interval(interval);\r
+ for (const BamRecord& record : query) {\r
+ (void)record;\r
+ ++count;\r
+ }\r
+ EXPECT_EQ(2, count);\r
+\r
+ // adjust again (empty region)\r
+ count = 0;\r
+ interval.Name(rname);\r
+ interval.Start(1000);\r
+ interval.Stop(2000);\r
+ query.Interval(interval);\r
+ for (const BamRecord& record : query) {\r
+ (void)record;\r
+ ++count;\r
+ }\r
+ EXPECT_EQ(0, count);\r
+\r
+ // unknown ref\r
+ count = 0;\r
+ interval.Name("does not exist");\r
+ interval.Start(0);\r
+ interval.Stop(100);\r
+ EXPECT_THROW(query.Interval(interval), std::runtime_error);\r
+ for (const BamRecord& record : query) { // iteration is still safe, just returns no data\r
+ (void)record;\r
+ ++count;\r
+ }\r
+ EXPECT_EQ(0, count);\r
+\r
+ // adjust again - make sure we can read a real region after an invalid one\r
+ interval.Name(rname);\r
+ interval.Start(5000);\r
+ interval.Stop(6000);\r
+ query.Interval(interval);\r
+ count = 0;\r
+ for (const BamRecord& record : query) {\r
+ (void)record;\r
+ ++count;\r
+ }\r
+ EXPECT_EQ(2, count);\r
+}\r
+\r
+TEST(GenomicIntervalQueryTest, NonConstBamRecord)\r
+{\r
+ EXPECT_NO_THROW(\r
+ {\r
+ BamFile bamFile(inputBamFn);\r
+ int count = 0;\r
+\r
+ GenomicInterval interval("lambda_NEB3011", 8000, 10000);\r
+ GenomicIntervalQuery query(interval, bamFile);\r
+ for (BamRecord& record : query) {\r
+ (void)record;\r
+ ++count;\r
+ }\r
+ EXPECT_EQ(2, count);\r
+ });\r
+}\r
+\r
+TEST(GenomicIntervalQueryTest, MissingBaiShouldThrow)\r
+{\r
+ GenomicInterval interval("lambda_NEB3011", 0, 100);\r
+ const string phi29Bam = tests::Data_Dir + "/phi29.bam";\r
+ const string hasBaiBam = tests::Data_Dir + "/aligned.bam";\r
+\r
+ { // single file, missing BAI\r
+ EXPECT_THROW(GenomicIntervalQuery query(interval, phi29Bam), std::runtime_error);\r
+ }\r
+\r
+ { // from dataset, all missing BAI\r
+ DataSet ds;\r
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));\r
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));\r
+ EXPECT_THROW(GenomicIntervalQuery query(interval, ds), std::runtime_error);\r
+ }\r
+\r
+ { // from dataset, mixed BAI presence\r
+ DataSet ds;\r
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));\r
+ ds.ExternalResources().Add(ExternalResource("PacBio.AlignmentFile.AlignmentBamFile", hasBaiBam));\r
+ EXPECT_THROW(GenomicIntervalQuery query(interval, ds), std::runtime_error);\r
+ }\r
+}\r
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include "pbbam/IndexedFastaReader.h"
+#include "pbbam/BamRecord.h"
+#include "pbbam/BamFile.h"
+#include "pbbam/EntireFileQuery.h"
+#include <gtest/gtest.h>
+#include <iostream>
+#include <sstream>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+const string lambdaFasta = tests::Data_Dir + "/lambdaNEB.fa";
+const string singleInsertionBam = tests::Data_Dir + "/aligned.bam";
+
+TEST(IndexedFastaReaderTests, PrintSingleInsertion)
+{
+ IndexedFastaReader r(lambdaFasta);
+
+ // Open BAM file
+ BamFile bamFile(singleInsertionBam);
+ EntireFileQuery bamQuery(bamFile);
+
+ auto it = bamQuery.begin();
+ auto record = *it++;
+ EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
+ r.ReferenceSubsequence(record, Orientation::NATIVE, true));
+ EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
+ r.ReferenceSubsequence(record, Orientation::NATIVE, true, true));
+ EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
+ r.ReferenceSubsequence(record, Orientation::GENOMIC, true));
+ EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
+ r.ReferenceSubsequence(record, Orientation::GENOMIC, true, true));
+ record = *it++;
+ EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
+ r.ReferenceSubsequence(record, Orientation::NATIVE, true));
+ EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
+ r.ReferenceSubsequence(record, Orientation::NATIVE, true, true));
+ EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
+ r.ReferenceSubsequence(record, Orientation::GENOMIC, true));
+ EXPECT_EQ("GGCTGCAGTGTACAGCGGTCAGGAGGCC-ATTGATGCCGGACTGGCTGAT",
+ r.ReferenceSubsequence(record, Orientation::GENOMIC, true, true));
+ record = *it++;
+ EXPECT_EQ("----------------------------------------------------AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
+ r.ReferenceSubsequence(record, Orientation::NATIVE, true));
+ EXPECT_EQ("AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
+ r.ReferenceSubsequence(record, Orientation::NATIVE, true, true));
+ EXPECT_EQ("----------------------------------------------------AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
+ r.ReferenceSubsequence(record, Orientation::GENOMIC, true));
+ EXPECT_EQ("AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
+ r.ReferenceSubsequence(record, Orientation::GENOMIC, true, true));
+ record = *it++;
+ EXPECT_EQ("AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA----------------------------------------------------",
+ r.ReferenceSubsequence(record, Orientation::GENOMIC, true));
+ EXPECT_EQ("----------------------------------------------------TTGCCGCTGTT-ACCGTGCTGCGATCTTCTGCCATCGACGGACGTCCCACATTGGTGACTT",
+ r.ReferenceSubsequence(record, Orientation::NATIVE, true));
+ EXPECT_EQ("AAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGT-AACAGCGGCAA",
+ r.ReferenceSubsequence(record, Orientation::GENOMIC, true, true));
+ EXPECT_EQ("TTGCCGCTGTT-ACCGTGCTGCGATCTTCTGCCATCGACGGACGTCCCACATTGGTGACTT",
+ r.ReferenceSubsequence(record, Orientation::NATIVE, true, true));
+
+ // {
+ // std::stringstream output;
+ // auto itSS = bamQuery.begin();
+ // {
+ // const auto recordSS = *itSS;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::NATIVE, true) << std::endl;
+ // output << std::endl;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::NATIVE, true, true) << std::endl;
+ // output << std::endl;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::GENOMIC, true) << std::endl;
+ // output << std::endl;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::GENOMIC, true, true) << std::endl;
+ // output << std::endl;
+ // }
+ // ++itSS;
+ // {
+ // const auto recordSS = *itSS;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::NATIVE, true) << std::endl;
+ // output << std::endl;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::NATIVE, true, true) << std::endl;
+ // output << std::endl;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::GENOMIC, true) << std::endl;
+ // output << std::endl;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::GENOMIC, true, true) << std::endl;
+ // output << std::endl;
+ // }
+ // ++itSS;
+ // {
+ // const auto recordSS = *itSS;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::NATIVE, true) << std::endl;
+ // output << std::endl;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::NATIVE, true, true) << std::endl;
+ // output << std::endl;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::GENOMIC, true) << std::endl;
+ // output << std::endl;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::GENOMIC, true, true) << std::endl;
+ // output << std::endl;
+ // }
+ // ++itSS;
+ // {
+ // const auto recordSS = *itSS;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::GENOMIC, true) << std::endl;
+ // output << std::endl;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::NATIVE, true) << std::endl;
+ // output << std::endl;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::GENOMIC, true, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::GENOMIC, true, true) << std::endl;
+ // output << std::endl;
+ // output << r.ReferenceSubsequence(recordSS, Orientation::NATIVE, true, true) << std::endl;
+ // output << recordSS.Sequence(Orientation::NATIVE, true, true) << std::endl;
+ // }
+ // std::cerr << output.str();
+ // }
+}
+
+TEST(IndexedFastaReaderTests, ReadLambda)
+{
+ IndexedFastaReader r(lambdaFasta);
+
+ EXPECT_TRUE(r.HasSequence("lambda_NEB3011"));
+ EXPECT_FALSE(r.HasSequence("dog"));
+ EXPECT_EQ(1, r.NumSequences());
+ EXPECT_EQ(48502, r.SequenceLength("lambda_NEB3011"));
+
+ string seq = r.Subsequence("lambda_NEB3011:0-10");
+ EXPECT_EQ("GGGCGGCGAC", seq);
+
+ string seq2 = r.Subsequence("lambda_NEB3011", 0, 10);
+ EXPECT_EQ("GGGCGGCGAC", seq2);
+
+ // subsequence extending beyond bounds returns clipped
+ string seq3 = r.Subsequence("lambda_NEB3011", 48400, 48600);
+ EXPECT_EQ(102, seq3.length());
+
+ // bad subsequence
+}
+
+TEST(IndexedFastaReaderTests, Errors)
+{
+ IndexedFastaReader r(lambdaFasta);
+
+ //
+ // attempt access without "opening"
+ //
+ // EXPECT_THROW(r.NumSequences(), std::exception);
+ // EXPECT_THROW(r.HasSequence("lambda_NEB3011"), std::exception);
+ // EXPECT_THROW(r.SequenceLength("lambda_NEB3011"), std::exception);
+ // EXPECT_THROW(r.Subsequence("lambda_NEB3011:0-10"), std::exception);
+
+ //
+ // invalid accesses after opening
+ //
+ EXPECT_THROW(r.SequenceLength("dog"), std::exception);
+ EXPECT_THROW(r.Subsequence("dog:0-10"), std::exception);
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/GenomicInterval.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(IntervalTest, Constructors)
+{
+ Interval<Position> empty;
+ Interval<Position> singleton(4);
+ Interval<Position> normal(5, 8);
+
+ EXPECT_EQ(0, empty.Start());
+ EXPECT_EQ(0, empty.Stop());
+
+ EXPECT_EQ(4, singleton.Start());
+ EXPECT_EQ(5, singleton.Stop());
+
+ EXPECT_EQ(5, normal.Start());
+ EXPECT_EQ(8, normal.Stop());
+
+ // TODO: check out-of-order intervals, etc
+}
+
+TEST(IntervalTest, EqualityTest)
+{
+ Interval<Position> empty;
+ Interval<Position> empty2;
+
+ Interval<Position> singleton(4);
+ Interval<Position> sameAsSingleton(4,5);
+
+ Interval<Position> normal(5, 8);
+ Interval<Position> sameAsNormal(5, 8);
+
+ Interval<Position> different(20, 40);
+
+ // self-equality
+ EXPECT_TRUE(empty == empty);
+ EXPECT_TRUE(singleton == singleton);
+ EXPECT_TRUE(normal == normal);
+ EXPECT_TRUE(different == different);
+
+ // same values equality
+ EXPECT_TRUE(empty == empty2);
+ EXPECT_TRUE(singleton == sameAsSingleton);
+ EXPECT_TRUE(normal == sameAsNormal);
+
+ // different values
+ EXPECT_FALSE(empty == singleton);
+ EXPECT_FALSE(empty == normal);
+ EXPECT_FALSE(empty == different);
+ EXPECT_FALSE(singleton == normal);
+ EXPECT_FALSE(normal == different);
+}
+
+TEST(IntervalTest, Copy)
+{
+ Interval<Position> interval1(5, 8);
+ Interval<Position> interval2(interval1);
+ Interval<Position> interval3 = interval1;
+
+ EXPECT_TRUE(interval1 == interval1);
+ EXPECT_TRUE(interval1 == interval2);
+ EXPECT_TRUE(interval1 == interval3);
+}
+
+TEST(IntervalTest, Modifier)
+{
+ Interval<Position> interval1(5, 8);
+ Interval<Position> interval2(interval1);
+ interval2.Start(2);
+ interval2.Stop(10);
+
+ EXPECT_FALSE(interval1 == interval2);
+ EXPECT_EQ(2, interval2.Start());
+ EXPECT_EQ(10, interval2.Stop());
+}
+
+TEST(IntervalTest, CoverTest)
+{
+ Interval<Position> interval1(2, 4);
+ Interval<Position> interval2(3, 5);
+ Interval<Position> interval3(6, 8);
+ Interval<Position> interval4(1, 7);
+ Interval<Position> interval5(5, 8);
+
+ EXPECT_TRUE(interval1.Covers(interval1)); // self-cover: a.covers(a)
+ EXPECT_TRUE(interval1.CoveredBy(interval1)); // self-cover: a.coveredBy(a)
+
+ EXPECT_TRUE(interval2.CoveredBy(interval4)); // a.coveredBy(b)
+ EXPECT_TRUE(interval4.Covers(interval2)); // thus b.covers(a)
+ EXPECT_FALSE(interval2 == interval4); // if a != b
+ EXPECT_FALSE(interval2.Covers(interval4)); // then !a.covers(b)
+
+ EXPECT_FALSE(interval2.Covers(interval3)); // completely disjoint
+ EXPECT_FALSE(interval3.Covers(interval2));
+ EXPECT_FALSE(interval2.CoveredBy(interval3));
+ EXPECT_FALSE(interval3.CoveredBy(interval2));
+
+ EXPECT_FALSE(interval2.Covers(interval5)); // a.stop == b.start
+ EXPECT_FALSE(interval2.CoveredBy(interval5));
+
+ EXPECT_TRUE(interval5.Covers(interval3)); // shared endpoint, start contained, thus a.covers(b)
+ EXPECT_TRUE(interval3.CoveredBy(interval5)); // and b.coveredBy(a)
+}
+
+TEST(IntervalTest, IntersectTest)
+{
+ Interval<Position> interval1(2, 4);
+ Interval<Position> interval2(3, 5);
+ Interval<Position> interval3(6, 8);
+ Interval<Position> interval4(1, 7);
+ Interval<Position> interval5(5, 8);
+
+ EXPECT_TRUE(interval1.Intersects(interval1)); // self-intersection: a.intersects(a)
+
+ EXPECT_TRUE(interval1.Intersects(interval2)); // if a.intersects(b)
+ EXPECT_TRUE(interval2.Intersects(interval1)); // then b.intersects(a)
+
+ EXPECT_TRUE(interval4.Covers(interval1)); // if b.covers(a),
+ EXPECT_TRUE(interval1.Intersects(interval4)); // then a.intersects(b)
+ EXPECT_TRUE(interval4.Intersects(interval1)); // and b.intersects(a)
+
+ EXPECT_FALSE(interval2.Intersects(interval3)); // b.start > a.stop (obvious disjoint)
+ EXPECT_FALSE(interval2.Intersects(interval5)); // b.start == a.stop (intervals are right open, so disjoint)
+}
+
+TEST(IntervalTest, ValidityTest)
+{
+ Interval<Position> interval1; // default ctor
+ Interval<Position> interval2(0,0); // start == stop (zero)
+ Interval<Position> interval3(4,4); // start == stop (nonzero)
+ Interval<Position> interval4(0,1); // start < stop (start is zero)
+ Interval<Position> interval5(4,5); // start < stop (start is nonzero)
+ Interval<Position> interval6(5,4); // start > stop
+
+ EXPECT_FALSE(interval1.IsValid());
+ EXPECT_FALSE(interval2.IsValid());
+ EXPECT_FALSE(interval3.IsValid());
+ EXPECT_TRUE(interval4.IsValid());
+ EXPECT_TRUE(interval5.IsValid());
+ EXPECT_FALSE(interval6.IsValid());
+}
+
+TEST(IntervalTest, LengthTest)
+{
+ Interval<Position> interval1(2, 4);
+ Interval<Position> interval2(3, 5);
+ Interval<Position> interval3(6, 8);
+ Interval<Position> interval4(1, 7);
+ Interval<Position> interval5(5, 8);
+
+ EXPECT_EQ(2, interval1.Length());
+ EXPECT_EQ(2, interval2.Length());
+ EXPECT_EQ(2, interval3.Length());
+ EXPECT_EQ(6, interval4.Length());
+ EXPECT_EQ(3, interval5.Length());
+
+ // TODO: check out-of-order intervals, etc
+}
+
+TEST(GenomicIntervalTest, DefaultConstructor)
+{
+ GenomicInterval gi;
+ EXPECT_EQ("", gi.Name());
+ EXPECT_EQ(0, gi.Start());
+ EXPECT_EQ(0, gi.Stop());
+}
+
+TEST(GenomicIntervalTest, ExplicitConstructor)
+{
+ GenomicInterval gi("foo", 100, 200);
+ EXPECT_EQ("foo", gi.Name());
+ EXPECT_EQ(100, gi.Start());
+ EXPECT_EQ(200, gi.Stop());
+}
+
+TEST(GenomicIntervalTest, RegionStringConstructor)
+{
+ GenomicInterval gi("foo:100-200");
+ EXPECT_EQ("foo", gi.Name());
+ EXPECT_EQ(100, gi.Start());
+ EXPECT_EQ(200, gi.Stop());
+
+ GenomicInterval refOnly("foo");
+ EXPECT_EQ("foo", refOnly.Name());
+ EXPECT_EQ(0, refOnly.Start());
+ EXPECT_EQ(1<<29, refOnly.Stop()); // htslib's default, "read-to-end" interval stop
+}
+
+TEST(GenomicIntervalTest, Copy)
+{
+ GenomicInterval interval1("foo", 10, 20);
+ GenomicInterval interval2(interval1);
+ GenomicInterval interval3 = interval1;
+
+ EXPECT_TRUE(interval1 == interval1);
+ EXPECT_TRUE(interval1 == interval2);
+ EXPECT_TRUE(interval1 == interval3);
+}
+
+TEST(GenomicIntervalTest, Modifiers)
+{
+ GenomicInterval interval1("foo", 10, 20);
+
+ // modify individual properties
+ GenomicInterval interval2(interval1);
+ interval2.Name("bar");
+ interval2.Start(2);
+ interval2.Stop(10);
+
+ // modify interval as a whole
+ GenomicInterval interval3(interval1);
+ interval3.Interval(interval2.Interval());
+
+ EXPECT_FALSE(interval1 == interval2);
+ EXPECT_EQ("bar", interval2.Name());
+ EXPECT_EQ(2, interval2.Start());
+ EXPECT_EQ(10, interval2.Stop());
+
+ EXPECT_EQ(interval1.Name(), interval3.Name());
+ EXPECT_EQ(interval2.Interval(), interval3.Interval());
+}
+
+TEST(GenomicIntervalTest, CoverTest)
+{
+ GenomicInterval interval1("foo", 2, 4);
+ GenomicInterval interval2("foo", 3, 5);
+ GenomicInterval interval3("foo", 6, 8);
+ GenomicInterval interval4("foo", 1, 7);
+ GenomicInterval interval5("foo", 5, 8);
+
+ // same as interval2, but different ref
+ GenomicInterval interval6(interval2);
+ interval6.Name("bar");
+
+ EXPECT_TRUE(interval1.Covers(interval1)); // self-cover: a.covers(a)
+ EXPECT_TRUE(interval1.CoveredBy(interval1)); // self-cover: a.coveredBy(a)
+
+ EXPECT_TRUE(interval2.CoveredBy(interval4)); // a.coveredBy(b)
+ EXPECT_TRUE(interval4.Covers(interval2)); // thus b.covers(a)
+ EXPECT_FALSE(interval2 == interval4); // if a != b
+ EXPECT_FALSE(interval2.Covers(interval4)); // then !a.covers(b)
+
+ EXPECT_FALSE(interval6.CoveredBy(interval4)); // interval 6 has same start/stop as 2, w/ different ref
+ EXPECT_FALSE(interval4.Covers(interval6)); //
+ EXPECT_FALSE(interval6 == interval4); //
+ EXPECT_FALSE(interval6.Covers(interval4)); //
+
+ EXPECT_FALSE(interval2.Covers(interval3)); // completely disjoint
+ EXPECT_FALSE(interval3.Covers(interval2));
+ EXPECT_FALSE(interval2.CoveredBy(interval3));
+ EXPECT_FALSE(interval3.CoveredBy(interval2));
+
+ EXPECT_FALSE(interval2.Covers(interval5)); // a.stop == b.start
+ EXPECT_FALSE(interval2.CoveredBy(interval5));
+
+ EXPECT_TRUE(interval5.Covers(interval3)); // shared endpoint, start contained, thus a.covers(b)
+ EXPECT_TRUE(interval3.CoveredBy(interval5)); // and b.coveredBy(a)
+}
+
+TEST(GenomicIntervalTest, ValidityTest)
+{
+ GenomicInterval interval1; // default ctor
+ GenomicInterval interval2("foo",0,0); // valid id, start == stop (zero)
+ GenomicInterval interval3("foo",4,4); // valid id, start == stop (nonzero)
+ GenomicInterval interval4("foo",0,1); // valid id, start < stop (start is zero)
+ GenomicInterval interval5("foo",4,5); // valid id, start < stop (start is nonzero)
+ GenomicInterval interval6("foo",5,4); // valid id, start > stop
+ GenomicInterval interval7("",0,0); // invalid id, start == stop (zero)
+ GenomicInterval interval8("",4,4); // invalid id, start == stop (nonzero)
+ GenomicInterval interval9("",0,1); // invalid id, start < stop (start is zero)
+ GenomicInterval interval10("",4,5); // invalid id, start < stop (start is nonzero)
+ GenomicInterval interval11("",5,4); // invalid id, start > stop
+
+ EXPECT_FALSE(interval1.IsValid());
+ EXPECT_FALSE(interval2.IsValid());
+ EXPECT_FALSE(interval3.IsValid());
+ EXPECT_TRUE(interval4.IsValid());
+ EXPECT_TRUE(interval5.IsValid());
+ EXPECT_FALSE(interval6.IsValid());
+ EXPECT_FALSE(interval7.IsValid());
+ EXPECT_FALSE(interval8.IsValid());
+ EXPECT_FALSE(interval9.IsValid());
+ EXPECT_FALSE(interval10.IsValid());
+ EXPECT_FALSE(interval11.IsValid());
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/BamFile.h>
+#include <pbbam/BamReader.h>
+#include <pbbam/BamWriter.h>
+#include <pbbam/EntireFileQuery.h>
+#include <pbbam/PbiBuilder.h>
+#include <pbbam/PbiIndex.h>
+#include <pbbam/PbiLookupData.h>
+#include <pbbam/PbiRawData.h>
+#include <string>
+#include <cstdio>
+#include <cstdlib>
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+const string test2BamFn = tests::Data_Dir + "/aligned2.bam";
+const string phi29BamFn = tests::Data_Dir + "/phi29.bam";
+
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
+static
+PbiRawData Test2Bam_CoreIndexData(void)
+
+{
+ PbiRawData rawData;
+ rawData.Version(PbiFile::Version_3_0_1);
+ rawData.FileSections(PbiFile::BASIC | PbiFile::MAPPED | PbiFile::REFERENCE);
+ rawData.NumReads(10);
+
+ PbiRawBasicData& basicData = rawData.BasicData();
+ basicData.rgId_ = { -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594 };
+ basicData.qStart_ = {48,387,0,9936,10232,7468,5557,7285,426,7064};
+ basicData.qEnd_ = {1132,1134,344,10187,10394,8906,7235,8657,1045,7421};
+ basicData.holeNumber_ = {49050,32328,32328,6469,6469,30983,13473,13473,19915,30983};
+ basicData.readQual_ = {0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6};
+ basicData.ctxtFlag_ = {0,0,0,0,0,0,0,0,0,0};
+ basicData.fileOffset_ = { 33816576, 33825163, 33831333, 33834264, 33836542, 33838065, 33849818, 33863499, 33874621, 1392836608 };
+
+ PbiRawMappedData& mappedData = rawData.MappedData();
+ mappedData.tId_ = {0,0,0,0,0,0,0,0,0,0};
+ mappedData.tStart_ = {0,302,675,2170,2203,3572,4506,4507,4592,4669};
+ mappedData.tEnd_ = {471,1019,1026,2397,2326,5015,6125,5850,5203,5011};
+ mappedData.aStart_ = {653,395,1,9960,10271,7468,5574,7285,441,7075};
+ mappedData.aEnd_ = {1129,1134,344,10185,10394,8906,7235,8647,1040,7418};
+ mappedData.revStrand_ = {0,1,0,1,0,1,1,0,1,0};
+ mappedData.nM_ = {460,704,339,216,118,1394,1581,1313,583,333};
+ mappedData.nMM_ = {0,0,0,0,0,0,0,0,0,0};
+ mappedData.mapQV_ = {254,254,254,254,254,254,254,254,254,254};
+
+ PbiRawReferenceData& referenceData = rawData.ReferenceData();
+ referenceData.entries_ = {
+ PbiReferenceEntry{0,0,10},
+ PbiReferenceEntry{4294967295,4294967295,4294967295}
+ };
+
+ return rawData;
+}
+
+// NOTE: We have 2 different sets of offsets because the copied, new file differs in size than the existing one.
+//
+// Unsure which combination of write parameters were used on the original. Things like thread count,
+// compression level, etc. can effect compression ratio, BGZF block sizes, etc. even though the BAM record
+// content itself is equal. So we'll just track these index values separately, for now at least.
+//
+static
+PbiRawData Test2Bam_ExistingIndex(void)
+{
+ PbiRawData index = Test2Bam_CoreIndexData();
+ index.BasicData().fileOffset_ = { 33816576, 33825163, 33831333, 33834264, 33836542, 33838065, 33849818, 33863499, 33874621, 1392836608 };
+ return index;
+}
+
+static
+PbiRawData Test2Bam_NewIndex(void)
+{
+ PbiRawData index = Test2Bam_CoreIndexData();
+ index.BasicData().fileOffset_ = { 33816576, 236126208, 391315456, 469106688, 537067520, 587792384, 867303424, 1182793728, 1449787392, 1582628864 };
+ return index;
+}
+
+static
+void ExpectRawIndicesEqual(const PbiRawData& expected, const PbiRawData& actual)
+{
+ // header data
+ EXPECT_EQ(expected.Version(), actual.Version());
+ EXPECT_EQ(expected.FileSections(), actual.FileSections());
+ EXPECT_EQ(expected.NumReads(), actual.NumReads());
+
+ // subread data
+ const PbiRawBasicData& e = expected.BasicData();
+ const PbiRawBasicData& a = actual.BasicData();
+ EXPECT_EQ(e.rgId_, a.rgId_);
+ EXPECT_EQ(e.qStart_, a.qStart_);
+ EXPECT_EQ(e.qEnd_, a.qEnd_);
+ EXPECT_EQ(e.holeNumber_, a.holeNumber_);
+ EXPECT_EQ(e.readQual_, a.readQual_);
+ EXPECT_EQ(e.ctxtFlag_, a.ctxtFlag_);
+ EXPECT_EQ(e.fileOffset_, a.fileOffset_);
+
+ // mapped data
+ EXPECT_EQ(expected.HasMappedData(), actual.HasMappedData());
+ if (expected.HasMappedData() && actual.HasMappedData()) {
+ const PbiRawMappedData& e = expected.MappedData();
+ const PbiRawMappedData& a = actual.MappedData();
+ EXPECT_EQ(e.tId_, a.tId_);
+ EXPECT_EQ(e.tStart_, a.tStart_);
+ EXPECT_EQ(e.tEnd_, a.tEnd_);
+ EXPECT_EQ(e.aStart_, a.aStart_);
+ EXPECT_EQ(e.aEnd_, a.aEnd_);
+ EXPECT_EQ(e.revStrand_, a.revStrand_);
+ EXPECT_EQ(e.nM_, a.nM_);
+ EXPECT_EQ(e.nMM_, a.nMM_);
+ EXPECT_EQ(e.mapQV_, a.mapQV_);
+ }
+
+ // reference data
+ EXPECT_EQ(expected.HasReferenceData(), actual.HasReferenceData());
+ if (expected.HasReferenceData() && actual.HasReferenceData()) {
+ const PbiRawReferenceData& e = expected.ReferenceData();
+ const PbiRawReferenceData& a = actual.ReferenceData();
+ EXPECT_EQ(e.entries_, a.entries_);
+ }
+
+ // barcode data
+ EXPECT_EQ(expected.HasBarcodeData(), actual.HasBarcodeData());
+ if (expected.HasBarcodeData() && actual.HasBarcodeData()) {
+ const PbiRawBarcodeData& e = expected.BarcodeData();
+ const PbiRawBarcodeData& a = actual.BarcodeData();
+ EXPECT_EQ(e.bcForward_, a.bcForward_);
+ EXPECT_EQ(e.bcReverse_, a.bcReverse_);
+ EXPECT_EQ(e.bcQual_, a.bcQual_);
+ }
+}
+
+static
+bool BasicLookupsEqual(const BasicLookupData& lhs,
+ const BasicLookupData& rhs)
+{
+ return (lhs.rgId_ == rhs.rgId_ &&
+ lhs.qStart_ == rhs.qStart_ &&
+ lhs.qEnd_ == rhs.qEnd_ &&
+ lhs.holeNumber_ == rhs.holeNumber_ &&
+ lhs.readQual_ == rhs.readQual_ &&
+ lhs.ctxtFlag_ == rhs.ctxtFlag_ &&
+ lhs.fileOffset_ == rhs.fileOffset_);
+}
+
+static
+bool MappedLookupsEqual(const MappedLookupData& lhs,
+ const MappedLookupData& rhs)
+{
+ return (lhs.tId_ == rhs.tId_ &&
+ lhs.tStart_ == rhs.tStart_ &&
+ lhs.tEnd_ == rhs.tEnd_ &&
+ lhs.aStart_ == rhs.aStart_ &&
+ lhs.aEnd_ == rhs.aEnd_ &&
+ lhs.nM_ == rhs.nM_ &&
+ lhs.nMM_ == rhs.nMM_ &&
+ lhs.mapQV_ == rhs.mapQV_ &&
+ lhs.forwardStrand_ == rhs.forwardStrand_ &&
+ lhs.reverseStrand_ == rhs.reverseStrand_);
+}
+
+static
+bool ReferenceLookupsEqual(const ReferenceLookupData& lhs,
+ const ReferenceLookupData& rhs)
+{
+ return lhs.references_ == rhs.references_;
+}
+
+static
+bool BarcodeLookupsEqual(const BarcodeLookupData& lhs,
+ const BarcodeLookupData& rhs)
+{
+ return (lhs.bcForward_ == rhs.bcForward_ &&
+ lhs.bcReverse_ == rhs.bcReverse_ &&
+ lhs.bcQual_ == rhs.bcQual_);
+}
+
+static
+bool PbiIndicesEqual(const PbiIndex& lhs, const PbiIndex& rhs)
+{
+ using namespace ::PacBio::BAM;
+ const unique_ptr<internal::PbiIndexPrivate>& lhsImpl = lhs.d_;
+ const unique_ptr<internal::PbiIndexPrivate>& rhsImpl = rhs.d_;
+ if (lhsImpl == rhsImpl)
+ return true;
+ if (lhsImpl == nullptr || rhsImpl == nullptr)
+ return false;
+
+ // metadata compare
+ if (lhsImpl->version_ != rhsImpl->version_ ||
+ lhsImpl->sections_ != rhsImpl->sections_ ||
+ lhsImpl->numReads_ != rhsImpl->numReads_)
+ { return false; }
+
+ // component compare
+ if ( !BasicLookupsEqual(lhsImpl->basicData_, rhsImpl->basicData_) ||
+ !MappedLookupsEqual(lhsImpl->mappedData_, rhsImpl->mappedData_) ||
+ !ReferenceLookupsEqual(lhsImpl->referenceData_, rhsImpl->referenceData_) ||
+ !BarcodeLookupsEqual(lhsImpl->barcodeData_, rhsImpl->barcodeData_))
+ { return false; }
+
+ // if we get here, OK
+ return true;
+}
+
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
+TEST(PacBioIndexTest, CreateFromExistingBam)
+{
+ // do this in temp directory, so we can ensure write access
+ const string tempDir = tests::GeneratedData_Dir + "/";
+ const string tempBamFn = tempDir + "aligned_copy.bam";
+ const string tempPbiFn = tempBamFn + ".pbi";
+ string cmd("cp ");
+ cmd += test2BamFn;
+ cmd += " ";
+ cmd += tempBamFn;
+ int cmdResult = system(cmd.c_str());
+ (void)cmdResult;
+
+ BamFile bamFile(tempBamFn);
+ PbiFile::CreateFrom(bamFile);
+ EXPECT_EQ(tempPbiFn, bamFile.PacBioIndexFilename());
+
+ PbiRawData index(bamFile.PacBioIndexFilename());
+ EXPECT_EQ(PbiFile::Version_3_0_1, index.Version());
+ EXPECT_EQ(10, index.NumReads());
+ EXPECT_TRUE(index.HasMappedData());
+
+ const PbiRawData& expectedIndex = tests::Test2Bam_ExistingIndex();
+ tests::ExpectRawIndicesEqual(expectedIndex, index);
+
+ // clean up temp file(s)
+ remove(tempBamFn.c_str());
+ remove(tempPbiFn.c_str());
+}
+
+::testing::AssertionResult CanRead(BamReader& reader, BamRecord& record, int i)
+{
+ if (reader.GetNext(record))
+ return ::testing::AssertionSuccess() << "i: " << i;
+ else
+ return ::testing::AssertionFailure() << "i: " << i;
+}
+
+TEST(PacBioIndexTest, CreateOnTheFly)
+{
+ // do this in temp directory, so we can ensure write access
+ const string tempDir = tests::GeneratedData_Dir + "/";
+ const string tempBamFn = tempDir + "temp.bam";
+ const string tempPbiFn = tempBamFn + ".pbi";
+
+ // NOTE: new file differs in size than existing (different write parameters may yield different file sizes, even though content is same)
+ const vector<int64_t> expectedNewOffsets = { 33816576, 236126208, 391315456, 469106688, 537067520, 587792384, 867303424, 1182793728, 1449787392, 1582628864 };
+ vector<int64_t> observedOffsets;
+
+ // create PBI on the fly from input BAM while we write to new file
+ {
+ BamFile bamFile(test2BamFn);
+ BamHeader header = bamFile.Header();
+
+ BamWriter writer(tempBamFn, header); // default compression, default thread count
+ PbiBuilder builder(tempPbiFn, header.Sequences().size());
+
+ int64_t vOffset = 0;
+ EntireFileQuery entireFile(bamFile);
+ for (const BamRecord& record : entireFile) {
+ writer.Write(record, &vOffset);
+ builder.AddRecord(record, vOffset);
+ observedOffsets.push_back(vOffset);
+ }
+ }
+
+ EXPECT_EQ(expectedNewOffsets, observedOffsets);
+
+ // sanity check on original file
+ {
+ const vector<int64_t> originalFileOffsets = { 33816576, 33825163, 33831333, 33834264, 33836542, 33838065, 33849818, 33863499, 33874621, 1392836608 };
+ BamRecord r;
+ BamReader reader(test2BamFn);
+ for (int i = 0; i < originalFileOffsets.size(); ++i) {
+ reader.VirtualSeek(originalFileOffsets.at(i));
+ EXPECT_TRUE(CanRead(reader, r, i));
+ }
+ }
+
+ // attempt to seek in our new file using both expected & observed offsets
+ {
+ BamRecord r;
+ BamReader reader(tempBamFn);
+ for (int i = 0; i < expectedNewOffsets.size(); ++i) {
+ reader.VirtualSeek(expectedNewOffsets.at(i));
+ EXPECT_TRUE(CanRead(reader, r, i));
+ }
+ for (int i = 0; i < observedOffsets.size(); ++i) {
+ reader.VirtualSeek(observedOffsets.at(i));
+ EXPECT_TRUE(CanRead(reader, r, i));
+ }
+ }
+
+ // compare data in new PBI file, to expected data
+ const PbiRawData& expectedIndex = tests::Test2Bam_NewIndex();
+ const PbiRawData& fromBuilt = PbiRawData(tempPbiFn);
+ tests::ExpectRawIndicesEqual(expectedIndex, fromBuilt);
+
+ // straight diff of newly-generated PBI file to existing PBI
+ // TODO: Come back to this once pbindexump is in place.
+ // We can't exactly do this since file offsets may differ between 2 BAMs of differing compression levels.
+ // Should add some sort of BAM checksum based on contents, not just size, for this reason.
+// const string pbiDiffCmd = string("diff -q ") + test2BamFn + ".pbi " + tempPbiFn;
+// EXPECT_EQ(0, system(pbiDiffCmd.c_str()));
+
+ // clean up temp file(s)
+ remove(tempBamFn.c_str());
+ remove(tempPbiFn.c_str());
+}
+
+TEST(PacBioIndexTest, RawLoadFromPbiFile)
+{
+ const BamFile bamFile(test2BamFn);
+ const string& pbiFilename = bamFile.PacBioIndexFilename();
+ const PbiRawData loadedIndex(pbiFilename);
+
+ const PbiRawData& expectedIndex = tests::Test2Bam_ExistingIndex();
+ tests::ExpectRawIndicesEqual(expectedIndex, loadedIndex);
+}
+
+TEST(PacBioIndexTest, BasicAndBarodeSectionsOnly)
+{
+ // do this in temp directory, so we can ensure write access
+ const string tempDir = tests::GeneratedData_Dir + "/";
+ const string tempBamFn = tempDir + "phi29.bam";
+ const string tempPbiFn = tempBamFn + ".pbi";
+ string cmd("cp ");
+ cmd += phi29BamFn;
+ cmd += " ";
+ cmd += tempDir;
+ int cmdResult = system(cmd.c_str());
+ (void)cmdResult;
+
+ BamFile bamFile(tempBamFn);
+ PbiFile::CreateFrom(bamFile);
+ EXPECT_EQ(tempPbiFn, bamFile.PacBioIndexFilename());
+
+ PbiRawData index(bamFile.PacBioIndexFilename());
+ EXPECT_EQ(PbiFile::Version_3_0_1, index.Version());
+ EXPECT_EQ(120, index.NumReads());
+ EXPECT_FALSE(index.HasMappedData());
+ EXPECT_TRUE(index.HasBarcodeData());
+
+ const vector<int16_t> expectedBcForward = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2};
+ const vector<int16_t> expectedBcReverse = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2};
+ const vector<int8_t> expectedBcQuality = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1};
+
+ const PbiRawBarcodeData& barcodeData = index.BarcodeData();
+ EXPECT_EQ(expectedBcForward, barcodeData.bcForward_);
+ EXPECT_EQ(expectedBcReverse, barcodeData.bcReverse_);
+ EXPECT_EQ(expectedBcQuality, barcodeData.bcQual_);
+
+
+ // clean up temp file(s)
+ remove(tempBamFn.c_str());
+ remove(tempPbiFn.c_str());
+
+}
+
+
+TEST(PacBioIndexTest, ReferenceDataNotLoadedOnUnsortedBam)
+{
+ BamFile bamFile(test2BamFn);
+ PbiRawData raw(bamFile.PacBioIndexFilename());
+ EXPECT_TRUE(raw.HasReferenceData());
+}
+
+TEST(PacBioIndexTest, LookupLoadFromFileOk)
+{
+ BamFile bamFile(test2BamFn);
+ EXPECT_NO_THROW(
+ {
+ PbiIndex index(bamFile.PacBioIndexFilename());
+ EXPECT_EQ(10, index.NumReads());
+ EXPECT_EQ(vector<int64_t>({ 33816576, 33825163, 33831333, 33834264, 33836542, 33838065, 33849818, 33863499, 33874621, 1392836608 }), index.BasicData().VirtualFileOffsets());
+ });
+}
+
+TEST(PacBioIndexTest, ThrowOnNonExistentPbiFile)
+{
+ EXPECT_THROW(PbiRawData raw("does_not_exist.pbi"), std::exception);
+ EXPECT_THROW(PbiIndex idx("does_not_exist.pbi"), std::exception);
+}
+
+TEST(PacBioIndexTest, ThrowOnNonPbiFile)
+{
+ // completely wrong format
+ const std::string fastaFn = tests::Data_Dir + "/lambdaNEB.fa";
+ EXPECT_THROW(PbiRawData raw(fastaFn), std::exception);
+ EXPECT_THROW(PbiIndex idx(fastaFn), std::exception);
+
+ // BGZF file, but not PBI
+ const std::string& bamFn = tests::Data_Dir + "/ex2.bam";
+ EXPECT_THROW(PbiRawData raw(bamFn), std::exception);
+ EXPECT_THROW(PbiIndex idx(bamFn), std::exception);
+}
+
+TEST(PacBioIndexTest, Copy_and_Move)
+{
+ const PbiIndex lookup(test2BamFn + ".pbi");
+
+ const PbiIndex copyConstructed(lookup);
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpessimizing-move"
+#endif
+ const PbiIndex moveConstructed(std::move(PbiIndex(test2BamFn + ".pbi")));
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+ PbiIndex copyAssigned;
+ copyAssigned = lookup;
+
+ PbiIndex moveAssigned;
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpessimizing-move"
+#endif
+ moveAssigned = std::move(PbiIndex(test2BamFn + ".pbi"));
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+ EXPECT_TRUE(tests::PbiIndicesEqual(lookup, copyConstructed));
+ EXPECT_TRUE(tests::PbiIndicesEqual(lookup, moveConstructed));
+ EXPECT_TRUE(tests::PbiIndicesEqual(lookup, copyAssigned));
+ EXPECT_TRUE(tests::PbiIndicesEqual(lookup, moveAssigned));
+}
+
+TEST(PacBioIndexTest, OrderedLookup)
+{
+ using PacBio::BAM::IndexList;
+ using PacBio::BAM::OrderedLookup;
+
+ OrderedLookup<int>::container_type oRawData;
+ oRawData[11] = { 0, 3, 4 };
+ oRawData[20] = { 1 };
+ oRawData[42] = { 2, 7, 8 };
+ oRawData[10] = { 5 };
+ oRawData[12] = { 6 };
+ oRawData[99] = { 9 };
+
+ OrderedLookup<int> oLookup(oRawData);
+
+ // EQUAL
+ EXPECT_EQ(IndexList({5}), oLookup.LookupIndices(10, Compare::EQUAL));
+ EXPECT_EQ(IndexList({0, 3, 4}), oLookup.LookupIndices(11, Compare::EQUAL));
+ EXPECT_EQ(IndexList({6}), oLookup.LookupIndices(12, Compare::EQUAL));
+ EXPECT_EQ(IndexList({1}), oLookup.LookupIndices(20, Compare::EQUAL));
+ EXPECT_EQ(IndexList({2, 7, 8}), oLookup.LookupIndices(42, Compare::EQUAL));
+ EXPECT_EQ(IndexList({9}), oLookup.LookupIndices(99, Compare::EQUAL));
+ EXPECT_EQ(IndexList(), oLookup.LookupIndices(66, Compare::EQUAL)); // does not exist
+
+ // NOT_EQUAL
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 6, 7, 8, 9}), oLookup.LookupIndices(10, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({1, 2, 5, 6, 7, 8, 9}), oLookup.LookupIndices(11, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 7, 8, 9}), oLookup.LookupIndices(12, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 2, 3, 4, 5, 6, 7, 8, 9}), oLookup.LookupIndices(20, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 3, 4, 5, 6, 9}), oLookup.LookupIndices(42, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8}), oLookup.LookupIndices(99, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), oLookup.LookupIndices(66, Compare::NOT_EQUAL)); // does not exist
+
+ // LESS_THAN
+ EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(13, Compare::LESS_THAN));
+ EXPECT_EQ(IndexList({0, 3, 4, 5}), oLookup.LookupIndices(12, Compare::LESS_THAN));
+ // do more checks
+
+ // LESS_THAN_EQUAL
+ EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(13, Compare::LESS_THAN_EQUAL));
+ EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), oLookup.LookupIndices(12, Compare::LESS_THAN_EQUAL));
+ // more checks?
+
+ // GREATER_THAN
+ EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(41, Compare::GREATER_THAN));
+ EXPECT_EQ(IndexList({9}), oLookup.LookupIndices(42, Compare::GREATER_THAN));
+ // more checks?
+
+ // GREATER_THAN_EQUAL
+ EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(41, Compare::GREATER_THAN_EQUAL));
+ EXPECT_EQ(IndexList({2,7,8,9}), oLookup.LookupIndices(42, Compare::GREATER_THAN_EQUAL));
+ // more checks?
+}
+
+TEST(PacBioIndexTest, UnorderedLookup)
+{
+ using PacBio::BAM::IndexList;
+ using PacBio::BAM::UnorderedLookup;
+
+ UnorderedLookup<int>::container_type uRawData;
+ uRawData[11] = { 0, 3, 4 };
+ uRawData[20] = { 1 };
+ uRawData[42] = { 2, 7, 8 };
+ uRawData[10] = { 5 };
+ uRawData[12] = { 6 };
+ uRawData[99] = { 9 };
+
+ UnorderedLookup<int> uLookup(uRawData);
+
+ // EQUAL
+ EXPECT_EQ(IndexList({5}), uLookup.LookupIndices(10, Compare::EQUAL));
+ EXPECT_EQ(IndexList({0, 3, 4}), uLookup.LookupIndices(11, Compare::EQUAL));
+ EXPECT_EQ(IndexList({6}), uLookup.LookupIndices(12, Compare::EQUAL));
+ EXPECT_EQ(IndexList({1}), uLookup.LookupIndices(20, Compare::EQUAL));
+ EXPECT_EQ(IndexList({2, 7, 8}), uLookup.LookupIndices(42, Compare::EQUAL));
+ EXPECT_EQ(IndexList({9}), uLookup.LookupIndices(99, Compare::EQUAL));
+ EXPECT_EQ(IndexList(), uLookup.LookupIndices(66, Compare::EQUAL)); // does not exist
+
+ // NOT_EQUAL
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 6, 7, 8, 9}), uLookup.LookupIndices(10, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({1, 2, 5, 6, 7, 8, 9}), uLookup.LookupIndices(11, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 7, 8, 9}), uLookup.LookupIndices(12, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 2, 3, 4, 5, 6, 7, 8, 9}), uLookup.LookupIndices(20, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 3, 4, 5, 6, 9}), uLookup.LookupIndices(42, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8}), uLookup.LookupIndices(99, Compare::NOT_EQUAL));
+ EXPECT_EQ(IndexList({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), uLookup.LookupIndices(66, Compare::NOT_EQUAL)); // does not exist
+
+ // LESS_THAN
+ EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(13, Compare::LESS_THAN));
+ EXPECT_EQ(IndexList({0, 3, 4, 5}), uLookup.LookupIndices(12, Compare::LESS_THAN));
+ // more checks?
+
+ // LESS_THAN_EQUAL
+ EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(13, Compare::LESS_THAN_EQUAL));
+ EXPECT_EQ(IndexList({0, 3, 4, 5, 6}), uLookup.LookupIndices(12, Compare::LESS_THAN_EQUAL));
+ // more checks?
+
+ // GREATER_THAN
+ EXPECT_EQ(IndexList({2,7,8,9}), uLookup.LookupIndices(41, Compare::GREATER_THAN));
+ EXPECT_EQ(IndexList({9}), uLookup.LookupIndices(42, Compare::GREATER_THAN));
+ // more checks?
+
+ // GREATER_THAN_EQUAL
+ EXPECT_EQ(uLookup.LookupIndices(41, Compare::GREATER_THAN_EQUAL), IndexList({2,7,8,9}));
+ EXPECT_EQ(uLookup.LookupIndices(42, Compare::GREATER_THAN_EQUAL), IndexList({2,7,8,9}));
+ // more checks?
+}
+
+TEST(PacBioIndexTest, MergeBlocks)
+{
+ using PacBio::BAM::IndexList;
+ using PacBio::BAM::IndexResultBlock;
+ using PacBio::BAM::IndexResultBlocks;
+ using PacBio::BAM::mergedIndexBlocks;
+ using PacBio::BAM::OrderedLookup;
+
+ OrderedLookup<int>::container_type oRawData;
+ oRawData[11] = { 0, 3, 4 };
+ oRawData[20] = { 1 };
+ oRawData[42] = { 2, 7, 8 };
+ oRawData[10] = { 5 };
+ oRawData[12] = { 6 };
+ oRawData[99] = { 9 };
+
+ OrderedLookup<int> oLookup(oRawData);
+
+ // EQUAL
+ auto mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, Compare::EQUAL));
+ EXPECT_EQ(1, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(5, 1), mergedBlocks.at(0));
+
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(11, Compare::EQUAL));
+ EXPECT_EQ(2, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(0, 1), mergedBlocks.at(0));
+ EXPECT_EQ(IndexResultBlock(3, 2), mergedBlocks.at(1));
+
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(12, Compare::EQUAL));
+ EXPECT_EQ(1, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(6, 1), mergedBlocks.at(0));
+
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(20, Compare::EQUAL));
+ EXPECT_EQ(1, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(1, 1), mergedBlocks.at(0));
+
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(42, Compare::EQUAL));
+ EXPECT_EQ(2, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(2, 1), mergedBlocks.at(0));
+ EXPECT_EQ(IndexResultBlock(7, 2), mergedBlocks.at(1));
+
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(99, Compare::EQUAL));
+ EXPECT_EQ(1, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(9, 1), mergedBlocks.at(0));
+
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(66, Compare::EQUAL));
+ EXPECT_TRUE(mergedBlocks.empty());
+
+ // NOT_EQUAL
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, Compare::NOT_EQUAL));
+ EXPECT_EQ(2, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(0, 5), mergedBlocks.at(0));
+ EXPECT_EQ(IndexResultBlock(6, 4), mergedBlocks.at(1));
+
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(11, Compare::NOT_EQUAL));
+ EXPECT_EQ(2, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(1, 2), mergedBlocks.at(0));
+ EXPECT_EQ(IndexResultBlock(5, 5), mergedBlocks.at(1));
+
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(12, Compare::NOT_EQUAL));
+ EXPECT_EQ(2, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(0, 6), mergedBlocks.at(0));
+ EXPECT_EQ(IndexResultBlock(7, 3), mergedBlocks.at(1));
+
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(20, Compare::NOT_EQUAL));
+ EXPECT_EQ(2, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(0, 1), mergedBlocks.at(0));
+ EXPECT_EQ(IndexResultBlock(2, 8), mergedBlocks.at(1));
+
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(42, Compare::NOT_EQUAL));
+ EXPECT_EQ(3, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(0, 2), mergedBlocks.at(0));
+ EXPECT_EQ(IndexResultBlock(3, 4), mergedBlocks.at(1));
+ EXPECT_EQ(IndexResultBlock(9, 1), mergedBlocks.at(2));
+
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(99, Compare::NOT_EQUAL));
+ EXPECT_EQ(1, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(0, 9), mergedBlocks.at(0));
+
+ mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(66, Compare::NOT_EQUAL));
+ EXPECT_EQ(1, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(0, 10), mergedBlocks.at(0));
+}
+
+TEST(PacBioIndexTest, ApplyOffsetsToBlocks)
+{
+ using PacBio::BAM::BasicLookupData;
+ using PacBio::BAM::IndexList;
+ using PacBio::BAM::IndexResultBlock;
+ using PacBio::BAM::IndexResultBlocks;
+ using PacBio::BAM::mergedIndexBlocks;
+ using PacBio::BAM::OrderedLookup;
+
+ OrderedLookup<int>::container_type oRawData;
+ oRawData[11] = { 0, 3, 4 };
+ oRawData[20] = { 1 };
+ oRawData[42] = { 2, 7, 8 };
+ oRawData[10] = { 5 };
+ oRawData[12] = { 6 };
+ oRawData[99] = { 9 };
+
+ OrderedLookup<int> oLookup(oRawData);
+ auto mergedBlocks = mergedIndexBlocks(oLookup.LookupIndices(10, Compare::NOT_EQUAL));
+
+ EXPECT_EQ(2, mergedBlocks.size());
+ EXPECT_EQ(IndexResultBlock(0, 5), mergedBlocks.at(0));
+ EXPECT_EQ(IndexResultBlock(6, 4), mergedBlocks.at(1));
+
+ BasicLookupData basicLookupData;
+ basicLookupData.fileOffset_ = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 };
+ basicLookupData.ApplyOffsets(mergedBlocks);
+
+ EXPECT_EQ(2, mergedBlocks.size());
+ EXPECT_EQ(0, mergedBlocks.at(0).virtualOffset_);
+ EXPECT_EQ(5, mergedBlocks.at(0).numReads_);
+ EXPECT_EQ(60, mergedBlocks.at(1).virtualOffset_);
+ EXPECT_EQ(4, mergedBlocks.at(1).numReads_);
+}
+
+TEST(PacBioIndexTest, LookupMulti)
+{
+ using PacBio::BAM::BasicLookupData;
+ using PacBio::BAM::IndexList;
+ using PacBio::BAM::IndexResultBlock;
+ using PacBio::BAM::IndexResultBlocks;
+ using PacBio::BAM::mergedIndexBlocks;
+ using PacBio::BAM::UnorderedLookup;
+
+ UnorderedLookup<int32_t>::container_type uRawData;
+ uRawData[11] = { 0, 3, 4 };
+ uRawData[20] = { 1 };
+ uRawData[42] = { 2, 7, 8 };
+ uRawData[10] = { 5 };
+ uRawData[12] = { 6 };
+ uRawData[99] = { 9 };
+
+ BasicLookupData basicLookup;
+ basicLookup.rgId_ = UnorderedLookup<int32_t>(uRawData);
+ basicLookup.fileOffset_ = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90 };
+
+ const std::vector<int32_t> whitelist = { 11, 42, 20 };
+ const auto indices = basicLookup.IndicesMulti(BasicLookupData::RG_ID, whitelist);
+
+ IndexResultBlocks mergedBlocks = mergedIndexBlocks(indices);
+ basicLookup.ApplyOffsets(mergedBlocks);
+
+ EXPECT_EQ(IndexList({0, 3, 4, 2, 7, 8, 1}), indices);
+ EXPECT_EQ(2, mergedBlocks.size());
+
+ const IndexResultBlock& block0 = mergedBlocks.at(0);
+ EXPECT_EQ(0, block0.firstIndex_);
+ EXPECT_EQ(5, block0.numReads_);
+ EXPECT_EQ(0, block0.virtualOffset_);
+
+ const IndexResultBlock& block1 = mergedBlocks.at(1);
+ EXPECT_EQ(7, block1.firstIndex_);
+ EXPECT_EQ(2, block1.numReads_);
+ EXPECT_EQ(70, block1.virtualOffset_);
+}
+
+TEST(PacBioIndexTest, LookupAPI)
+{
+ const PbiIndex index(test2BamFn + ".pbi");
+ const BasicLookupData& basicData = index.BasicData();
+ const MappedLookupData& mappedData = index.MappedData();
+ const BarcodeLookupData& barcodeData = index.BarcodeData();
+
+ // rgId == x
+ IndexResultBlocks rgResult = mergedIndexBlocks(basicData.Indices(BasicLookupData::RG_ID, -1197849594));
+ basicData.ApplyOffsets(rgResult);
+ EXPECT_EQ(1, rgResult.size());
+ EXPECT_EQ(0, rgResult.at(0).firstIndex_);
+ EXPECT_EQ(10, rgResult.at(0).numReads_);
+ EXPECT_EQ(33816576, rgResult.at(0).virtualOffset_);
+
+ // rg != x
+ IndexResultBlocks notRgResult = mergedIndexBlocks(basicData.Indices(BasicLookupData::RG_ID,
+ -1197849594,
+ Compare::NOT_EQUAL));
+ basicData.ApplyOffsets(notRgResult);
+ EXPECT_TRUE(notRgResult.empty());
+
+ // tEnd <= x
+ IndexResultBlocks tEndLteResult = mergedIndexBlocks(mappedData.Indices(MappedLookupData::T_END,
+ 4500,
+ Compare::LESS_THAN_EQUAL));
+ basicData.ApplyOffsets(tEndLteResult);
+ EXPECT_EQ(1, tEndLteResult.size());
+ EXPECT_EQ(0, tEndLteResult.at(0).firstIndex_);
+ EXPECT_EQ(5, tEndLteResult.at(0).numReads_);
+ EXPECT_EQ(33816576, tEndLteResult.at(0).virtualOffset_);
+
+ // tEnd >= x
+ IndexResultBlocks tEndGteResult = mergedIndexBlocks(mappedData.Indices(MappedLookupData::T_START,
+ 4500,
+ Compare::GREATER_THAN_EQUAL));
+ basicData.ApplyOffsets(tEndGteResult);
+ EXPECT_EQ(1, tEndGteResult.size());
+ EXPECT_EQ(6, tEndGteResult.at(0).firstIndex_);
+ EXPECT_EQ(4, tEndGteResult.at(0).numReads_);
+ EXPECT_EQ(33849818, tEndGteResult.at(0).virtualOffset_);
+
+ // strand query
+ IndexResultBlocks forward = mergedIndexBlocks(mappedData.Indices(MappedLookupData::STRAND,
+ Strand::FORWARD));
+ basicData.ApplyOffsets(forward);
+ EXPECT_EQ(5, forward.size());
+ EXPECT_EQ(0, forward.at(0).firstIndex_);
+ EXPECT_EQ(1, forward.at(0).numReads_);
+ EXPECT_EQ(33816576, forward.at(0).virtualOffset_);
+
+ EXPECT_EQ(2, forward.at(1).firstIndex_);
+ EXPECT_EQ(1, forward.at(1).numReads_);
+ EXPECT_EQ(33831333, forward.at(1).virtualOffset_);
+
+ EXPECT_EQ(4, forward.at(2).firstIndex_);
+ EXPECT_EQ(1, forward.at(2).numReads_);
+ EXPECT_EQ(33836542, forward.at(2).virtualOffset_);
+
+ EXPECT_EQ(7, forward.at(3).firstIndex_);
+ EXPECT_EQ(1, forward.at(3).numReads_);
+ EXPECT_EQ(33863499, forward.at(3).virtualOffset_);
+
+ EXPECT_EQ(9, forward.at(4).firstIndex_);
+ EXPECT_EQ(1, forward.at(4).numReads_);
+ EXPECT_EQ(1392836608, forward.at(4).virtualOffset_);
+
+ // 0,1,0,1,0,1,1,0,1,0
+ IndexResultBlocks reverse = mergedIndexBlocks(mappedData.Indices(MappedLookupData::STRAND,
+ Strand::REVERSE));
+ basicData.ApplyOffsets(reverse);
+ EXPECT_EQ(4, reverse.size());
+ EXPECT_EQ(1, reverse.at(0).firstIndex_);
+ EXPECT_EQ(1, reverse.at(0).numReads_);
+ EXPECT_EQ(33825163, reverse.at(0).virtualOffset_);
+
+ EXPECT_EQ(3, reverse.at(1).firstIndex_);
+ EXPECT_EQ(1, reverse.at(1).numReads_);
+ EXPECT_EQ(33834264, reverse.at(1).virtualOffset_);
+
+ EXPECT_EQ(5, reverse.at(2).firstIndex_);
+ EXPECT_EQ(2, reverse.at(2).numReads_);
+ EXPECT_EQ(33838065, reverse.at(2).virtualOffset_);
+
+ EXPECT_EQ(8, reverse.at(3).firstIndex_);
+ EXPECT_EQ(1, reverse.at(3).numReads_);
+ EXPECT_EQ(33874621, reverse.at(3).virtualOffset_);
+
+ // query data field that is not in the PBI
+ IndexResultBlocks missing = mergedIndexBlocks(barcodeData.Indices(BarcodeLookupData::BC_QUALITY,
+ 77,
+ Compare::GREATER_THAN));
+ basicData.ApplyOffsets(missing);
+ EXPECT_TRUE(missing.empty());
+}
+
+TEST(PacBioIndexTest, LookupByZmw)
+{
+ BamFile f(tests::Data_Dir + "/dataset/bam_mapping.bam");
+ f.EnsurePacBioIndexExists();
+
+ const PbiIndex index(f.PacBioIndexFilename());
+ const BasicLookupData& basicData = index.BasicData();
+
+ IndexResultBlocks blocks = mergedIndexBlocks(basicData.Indices(BasicLookupData::ZMW,
+ 20000,
+ Compare::LESS_THAN));
+ basicData.ApplyOffsets(blocks);
+ EXPECT_EQ(14, blocks.size());
+
+ //
+ // we'll take a look at first 5 contiguous blocks of reads with ZMW < 20000
+ //
+ // skipped: { 49050, 32328, 32328 }
+ // block0: { 6469, 6469 }
+ // skipped: { 30983 }
+ // block1: { 13473, 13473, 19915 }
+ // skipped: { 30983 }
+ // block2: { 19915, 7247, 7247 }
+ // skipped: { 38025 }
+ // block3: { 13473 }
+ // skipped: { 36363, 36363, 31174, 31174, 38025, 50257, 50257 }
+ // block4: { 14743, 14743 }
+ //
+
+ const IndexResultBlock& block0 = blocks.at(0);
+ EXPECT_EQ(3, block0.firstIndex_);
+ EXPECT_EQ(2, block0.numReads_);
+ EXPECT_EQ(32654529, block0.virtualOffset_);
+
+ const IndexResultBlock& block1 = blocks.at(1);
+ EXPECT_EQ(6, block1.firstIndex_);
+ EXPECT_EQ(3, block1.numReads_);
+ EXPECT_EQ(32669996, block1.virtualOffset_);
+
+ const IndexResultBlock& block2 = blocks.at(2);
+ EXPECT_EQ(10, block2.firstIndex_);
+ EXPECT_EQ(3, block2.numReads_);
+ EXPECT_EQ(1388841957, block2.virtualOffset_);
+
+ const IndexResultBlock& block3 = blocks.at(3);
+ EXPECT_EQ(14, block3.firstIndex_);
+ EXPECT_EQ(1, block3.numReads_);
+ EXPECT_EQ(1388864866, block3.virtualOffset_);
+
+ const IndexResultBlock& block4 = blocks.at(4);
+ EXPECT_EQ(22, block4.firstIndex_);
+ EXPECT_EQ(2, block4.numReads_);
+ EXPECT_EQ(1388892121, block4.virtualOffset_);
+}
+
+TEST(PacBioIndexTest, LookupMultiZmw)
+{
+ BamFile f(tests::Data_Dir + "/dataset/bam_mapping.bam");
+ f.EnsurePacBioIndexExists();
+
+ const PbiIndex index(f.PacBioIndexFilename());
+ const BasicLookupData& basicData = index.BasicData();
+
+ const std::vector<int32_t> whitelist = { 13473, 38025 };
+ IndexResultBlocks blocks = mergedIndexBlocks(basicData.IndicesMulti(BasicLookupData::ZMW, whitelist));
+ basicData.ApplyOffsets(blocks);
+
+ EXPECT_EQ(3, blocks.size());
+
+ const IndexResultBlock& block0 = blocks.at(0);
+ EXPECT_EQ(6, block0.firstIndex_);
+ EXPECT_EQ(2, block0.numReads_);
+ EXPECT_EQ(32669996, block0.virtualOffset_);
+
+ const IndexResultBlock& block1 = blocks.at(1);
+ EXPECT_EQ(13, block1.firstIndex_);
+ EXPECT_EQ(2, block1.numReads_);
+ EXPECT_EQ(1388851626, block1.virtualOffset_);
+
+ const IndexResultBlock& block2 = blocks.at(2);
+ EXPECT_EQ(19, block2.firstIndex_);
+ EXPECT_EQ(1, block2.numReads_);
+ EXPECT_EQ(1388881468, block2.virtualOffset_);
+}
+
+TEST(PacBioIndexTest, AggregatePBI)
+{
+
+ DataSet ds;
+ ExternalResources& resources = ds.ExternalResources();
+ resources.Add(BamFile{tests::Data_Dir + "/aligned.bam"}); // 4 reads, BASIC | MAPPED | REFERENCE
+ resources.Add(BamFile{tests::Data_Dir + "/polymerase/production.subreads.bam"}); // 8 reads, BASIC | BARCODE
+ resources.Add(BamFile{tests::Data_Dir + "/polymerase/production_hq.hqregion.bam"}); // 1 read, BASIC only
+
+ const PbiRawData index{ds};
+ const PbiRawBasicData& mergedBasicData = index.BasicData();
+ const PbiRawBarcodeData& mergedBarcodeData = index.BarcodeData();
+ const PbiRawMappedData& mergedMappedData = index.MappedData();
+
+ const uint32_t expectedTotal = 13; // 4 + 8 + 1
+
+ // 'meta' info
+ EXPECT_EQ(expectedTotal, index.NumReads());
+ EXPECT_EQ(PbiFile::BASIC | PbiFile::MAPPED | PbiFile::BARCODE, index.FileSections());
+ EXPECT_TRUE(index.HasBarcodeData());
+ EXPECT_TRUE(index.HasMappedData());
+ EXPECT_FALSE(index.HasReferenceData());
+
+ // file numbers
+ EXPECT_EQ(0, mergedBasicData.fileNumber_.at(0));
+ EXPECT_EQ(0, mergedBasicData.fileNumber_.at(1));
+ EXPECT_EQ(0, mergedBasicData.fileNumber_.at(2));
+ EXPECT_EQ(0, mergedBasicData.fileNumber_.at(3));
+ EXPECT_EQ(1, mergedBasicData.fileNumber_.at(4));
+ EXPECT_EQ(1, mergedBasicData.fileNumber_.at(5));
+ EXPECT_EQ(1, mergedBasicData.fileNumber_.at(6));
+ EXPECT_EQ(1, mergedBasicData.fileNumber_.at(7));
+ EXPECT_EQ(1, mergedBasicData.fileNumber_.at(8));
+ EXPECT_EQ(1, mergedBasicData.fileNumber_.at(9));
+ EXPECT_EQ(1, mergedBasicData.fileNumber_.at(10));
+ EXPECT_EQ(1, mergedBasicData.fileNumber_.at(11));
+ EXPECT_EQ(2, mergedBasicData.fileNumber_.at(12));
+
+ // basic data
+ EXPECT_EQ(0, mergedBasicData.qStart_.at(0)); // file 1
+ EXPECT_EQ(0, mergedBasicData.qStart_.at(1));
+ EXPECT_EQ(2659, mergedBasicData.qStart_.at(4)); // file 2
+ EXPECT_EQ(3116, mergedBasicData.qStart_.at(5));
+ EXPECT_EQ(2659, mergedBasicData.qStart_.at(12)); // file 3
+
+ EXPECT_EQ(21102592, mergedBasicData.fileOffset_.at(0)); // file 1
+ EXPECT_EQ(21102883, mergedBasicData.fileOffset_.at(1));
+ EXPECT_EQ(19857408, mergedBasicData.fileOffset_.at(4)); // file 2
+ EXPECT_EQ(19860696, mergedBasicData.fileOffset_.at(5));
+ EXPECT_EQ(20054016, mergedBasicData.fileOffset_.at(12)); // file 3
+
+ // mapped data
+ EXPECT_EQ(60, mergedMappedData.mapQV_.at(0)); // file 1
+ EXPECT_EQ(60, mergedMappedData.mapQV_.at(1));
+ EXPECT_EQ(255, mergedMappedData.mapQV_.at(4)); // file 2
+ EXPECT_EQ(255, mergedMappedData.mapQV_.at(5));
+ EXPECT_EQ(255, mergedMappedData.mapQV_.at(12)); // file 3
+
+ // barcode data
+ EXPECT_EQ(-1, mergedBarcodeData.bcForward_.at(0)); // file 1
+ EXPECT_EQ(-1, mergedBarcodeData.bcForward_.at(1));
+ EXPECT_EQ(92, mergedBarcodeData.bcForward_.at(4)); // file 2
+ EXPECT_EQ(92, mergedBarcodeData.bcForward_.at(5));
+ EXPECT_EQ(-1, mergedBarcodeData.bcForward_.at(12)); // file 3
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.\r
+//\r
+// All rights reserved.\r
+//\r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted (subject to the limitations in the\r
+// disclaimer below) provided that the following conditions are met:\r
+//\r
+// * Redistributions of source code must retain the above copyright\r
+// notice, this list of conditions and the following disclaimer.\r
+//\r
+// * Redistributions in binary form must reproduce the above\r
+// copyright notice, this list of conditions and the following\r
+// disclaimer in the documentation and/or other materials provided\r
+// with the distribution.\r
+//\r
+// * Neither the name of Pacific Biosciences nor the names of its\r
+// contributors may be used to endorse or promote products derived\r
+// from this software without specific prior written permission.\r
+//\r
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE\r
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC\r
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED\r
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\r
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS\r
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\r
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF\r
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\r
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\r
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT\r
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
+// SUCH DAMAGE.\r
+\r
+// Author: Derek Barnett\r
+\r
+#ifdef PBBAM_TESTING\r
+#define private public\r
+#endif\r
+\r
+#include "TestData.h"\r
+#include <gtest/gtest.h>\r
+#include <pbbam/PbiFilter.h>\r
+#include <string>\r
+#include <cstdio>\r
+#include <cstdlib>\r
+using namespace PacBio;\r
+using namespace PacBio::BAM;\r
+using namespace std;\r
+\r
+namespace PacBio {\r
+namespace BAM {\r
+namespace tests {\r
+\r
+// helper structs & methods\r
+\r
+static\r
+PbiRawData test2Bam_RawIndex(void)\r
+{\r
+ PbiRawData index;\r
+ index.NumReads(4);\r
+\r
+ PbiRawBasicData& subreadData = index.BasicData();\r
+ subreadData.rgId_ = { -1197849594, -1197849594, -1197849594, -1197849594 };\r
+ subreadData.qStart_ = { 2114, 2579, 4101, 5615 };\r
+ subreadData.qEnd_ = { 2531, 4055, 5571, 6237 };\r
+ subreadData.holeNumber_ = { 14743, 14743, 14743, 14743 };\r
+ subreadData.readQual_ = { 0.901, 0.601, 0.901, 0.601 };\r
+ subreadData.ctxtFlag_ = { 0, 1, 2, 3 };\r
+ subreadData.fileOffset_ = { 35651584, 35655125, 35667128, 35679170 };\r
+\r
+ PbiRawMappedData& mappedData = index.mappedData_;\r
+ mappedData.tId_ = { 0, 0, 0, 0 };\r
+ mappedData.tStart_ = { 9507, 8453, 8455, 9291 };\r
+ mappedData.tEnd_ = { 9903, 9902, 9893, 9900 };\r
+ mappedData.aStart_ = { 2130, 2581, 4102, 5619 };\r
+ mappedData.aEnd_ = { 2531, 4055, 5560, 6237 };\r
+ mappedData.revStrand_ = { 0, 1, 0, 1 };\r
+ mappedData.mapQV_ = { 254, 254, 254, 254 };\r
+ mappedData.nM_ = { 384, 1411, 1393, 598 };\r
+ mappedData.nMM_ = { 0, 0, 0, 0 };\r
+\r
+ PbiRawBarcodeData& barcodeData = index.barcodeData_;\r
+ barcodeData.bcForward_ = { 0, 17, 256, 17 };\r
+ barcodeData.bcReverse_ = { 1, 18, 257, 18 };\r
+ barcodeData.bcQual_ = { 42, 80, 42, 110 };\r
+\r
+ PbiRawReferenceData& referenceData = index.referenceData_;\r
+ referenceData.entries_.emplace_back( 0, 0, 3 );\r
+ referenceData.entries_.emplace_back( 1 );\r
+ referenceData.entries_.emplace_back( PbiReferenceEntry::UNMAPPED_ID );\r
+\r
+ return index;\r
+}\r
+\r
+static const PbiRawData shared_index = test2Bam_RawIndex();\r
+\r
+static\r
+void checkFilterRows(const PbiFilter& filter, const std::vector<size_t> expectedRows)\r
+{\r
+ for (size_t row : expectedRows)\r
+ EXPECT_TRUE(filter.Accepts(shared_index, row));\r
+}\r
+\r
+static\r
+void checkFilterInternals(const PbiFilter& filter,\r
+ const PbiFilter::CompositionType expectedType,\r
+ const size_t expectedNumChildren,\r
+ const std::vector<size_t> expectedRows)\r
+{\r
+ EXPECT_EQ(expectedType, filter.d_->type_);\r
+ EXPECT_EQ(expectedNumChildren, filter.d_->filters_.size());\r
+ checkFilterRows(filter, expectedRows);\r
+}\r
+\r
+struct SimpleFilter\r
+{\r
+ bool Accepts(const PbiRawData& idx, const size_t row) const\r
+ { (void)idx; (void)row; return true; }\r
+};\r
+\r
+struct NoncompliantFilter { };\r
+\r
+struct SortUniqueTestFilter\r
+{\r
+ bool Accepts(const PbiRawData& idx, const size_t row) const\r
+ {\r
+ (void)idx;\r
+ switch(row) {\r
+ case 0: // fall through\r
+ case 1: // .\r
+ case 2: // .\r
+ case 3: // .\r
+ case 4: // .\r
+ case 7: // .\r
+ case 8: return true;\r
+ default:\r
+ return false;\r
+ }\r
+ }\r
+};\r
+\r
+struct SortUniqueTestFilter2\r
+{\r
+ bool Accepts(const PbiRawData& idx, const size_t row) const\r
+ {\r
+ (void)idx;\r
+ switch(row) {\r
+ case 3: // fall through\r
+ case 7: // .\r
+ case 5: return true;\r
+ default:\r
+ return false;\r
+ }\r
+ }\r
+};\r
+\r
+static inline\r
+PbiFilter emptyFilter(void)\r
+{ return PbiFilter{ }; }\r
+\r
+static inline\r
+PbiFilter simpleFilter(void)\r
+{ return PbiFilter{ SimpleFilter{ } }; }\r
+\r
+} // namespace tests\r
+} // namespace BAM\r
+} // namespace PacBio\r
+\r
+TEST(PbiFilterTest, DefaultCtorOk)\r
+{\r
+ auto filter = PbiFilter{ };\r
+ tests::checkFilterInternals(filter, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});\r
+}\r
+\r
+TEST(PbiFilterTest, CompositionOk)\r
+{\r
+ auto filter = PbiFilter{ };\r
+ filter.Add(PbiFilter{ });\r
+ tests::checkFilterInternals(filter, PbiFilter::INTERSECT, 1, std::vector<size_t>{0,1,2,3});\r
+}\r
+\r
+TEST(PbiFilterTest, CustomFilterOk)\r
+{\r
+ { // ctor\r
+ auto filter = PbiFilter{ tests::SimpleFilter{ } };\r
+ tests::checkFilterInternals(filter, PbiFilter::INTERSECT, 1, std::vector<size_t>{});\r
+ }\r
+ { // Add\r
+ auto filter = PbiFilter{ };\r
+ filter.Add(tests::SimpleFilter{ });\r
+ tests::checkFilterInternals(filter, PbiFilter::INTERSECT, 1, std::vector<size_t>{});\r
+ }\r
+\r
+// PbiFilter shouldNotCompile = PbiFilter{ tests::NoncompliantFilter{ } }; // <-- when uncommented, should not compile\r
+// PbiFilter shouldNotCompileEither; shouldNotCompileEither.Add(tests::NoncompliantFilter{ }); // <-- when uncommented, should not compile\r
+}\r
+\r
+TEST(PbiFilterTest, CopyOk)\r
+{\r
+ { // empty\r
+ const auto original = PbiFilter{ };\r
+\r
+ PbiFilter copyCtor(original);\r
+ PbiFilter copyAssign;\r
+ copyAssign = original;\r
+\r
+ tests::checkFilterInternals(original, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});\r
+ tests::checkFilterInternals(copyCtor, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});\r
+ tests::checkFilterInternals(copyAssign, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ { // with children\r
+ const auto original = PbiFilter{ tests::SimpleFilter{ } };\r
+\r
+ PbiFilter copyCtor(original);\r
+ PbiFilter copyAssign;\r
+ copyAssign = original;\r
+\r
+ tests::checkFilterInternals(original, PbiFilter::INTERSECT, 1, std::vector<size_t>{});\r
+ tests::checkFilterInternals(copyCtor, PbiFilter::INTERSECT, 1, std::vector<size_t>{});\r
+ tests::checkFilterInternals(copyAssign, PbiFilter::INTERSECT, 1, std::vector<size_t>{});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, MoveOk)\r
+{\r
+ { // empty\r
+ const auto original = tests::emptyFilter();\r
+\r
+ PbiFilter moveCtor(tests::emptyFilter());\r
+ PbiFilter moveAssign;\r
+ moveAssign = tests::emptyFilter();\r
+\r
+ tests::checkFilterInternals(original, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});\r
+ tests::checkFilterInternals(moveCtor, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});\r
+ tests::checkFilterInternals(moveAssign, PbiFilter::INTERSECT, 0, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ { // with children\r
+ const auto original = tests::simpleFilter();\r
+\r
+ PbiFilter moveCtor(tests::simpleFilter());\r
+ PbiFilter moveAssign;\r
+ moveAssign = tests::simpleFilter();\r
+\r
+ tests::checkFilterInternals(original, PbiFilter::INTERSECT, 1, std::vector<size_t>{0,1,2,3});\r
+ tests::checkFilterInternals(moveCtor, PbiFilter::INTERSECT, 1, std::vector<size_t>{0,1,2,3});\r
+ tests::checkFilterInternals(moveAssign, PbiFilter::INTERSECT, 1, std::vector<size_t>{0,1,2,3});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, SortsAndUniquesChildFilterResultsOk)\r
+{\r
+ const auto childFilter = tests::SortUniqueTestFilter{ };\r
+ const auto filter = PbiFilter{ childFilter };\r
+ tests::checkFilterRows(childFilter, std::vector<size_t>{2, 7, 0, 3, 4, 1, 8});\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0, 1, 2, 3, 4, 7, 8});\r
+}\r
+\r
+TEST(PbiFilterTest, UnionOk)\r
+{\r
+ { // empty\r
+ { // copy\r
+ const auto emptyFilter = tests::emptyFilter();\r
+ const auto emptyFilter2 = tests::emptyFilter();\r
+ const auto u = PbiFilter::Union({ emptyFilter, emptyFilter2 });\r
+ tests::checkFilterInternals(u, PbiFilter::UNION, 2, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ { // move\r
+ const auto u = PbiFilter::Union({ PbiFilter{ }, PbiFilter{ } });\r
+ tests::checkFilterInternals(u, PbiFilter::UNION, 2, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ }\r
+\r
+ { // with (no-data) children - just checking composition\r
+ { // copy\r
+ const auto simpleFilter = tests::SimpleFilter{ };\r
+ const auto simpleFilter2 = tests::SimpleFilter{ };\r
+ const auto u = PbiFilter::Union({ simpleFilter, simpleFilter2 });\r
+ tests::checkFilterInternals(u, PbiFilter::UNION, 2, std::vector<size_t>{});\r
+ }\r
+ { // move\r
+ const auto u = PbiFilter::Union({ tests::SimpleFilter{ }, tests::SimpleFilter{ } });\r
+ tests::checkFilterInternals(u, PbiFilter::UNION, 2, std::vector<size_t>{});\r
+ }\r
+ }\r
+\r
+ { // 2-child union, results sorted & unique-d by PbiFilter\r
+\r
+ const auto child1 = tests::SortUniqueTestFilter{ };\r
+ const auto child2 = tests::SortUniqueTestFilter2{ };\r
+ const auto u = PbiFilter::Union({ child1, child2 });\r
+\r
+ tests::checkFilterRows(child1, std::vector<size_t>{2, 7, 0, 3, 4, 1, 8});\r
+ tests::checkFilterRows(child2, std::vector<size_t>{3, 7, 5});\r
+ tests::checkFilterRows(u, std::vector<size_t>{0, 1, 2, 3, 4, 5, 7, 8});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, IntersectOk)\r
+{\r
+ { // empty\r
+ { // copy\r
+ const auto emptyFilter = tests::emptyFilter();\r
+ const auto emptyFilter2 = tests::emptyFilter();\r
+ const auto i = PbiFilter::Intersection({ emptyFilter, emptyFilter2 });\r
+ tests::checkFilterInternals(i, PbiFilter::INTERSECT, 2, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ { // move\r
+ const auto i = PbiFilter::Intersection({ PbiFilter{ }, PbiFilter{ } });\r
+ tests::checkFilterInternals(i, PbiFilter::INTERSECT, 2, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ }\r
+\r
+ { // with (no-data) children - just checking composition\r
+ { // copy\r
+ const auto simpleFilter = tests::SimpleFilter{ };\r
+ const auto simpleFilter2 = tests::SimpleFilter{ };\r
+ const auto i = PbiFilter::Intersection({ simpleFilter, simpleFilter2 });\r
+ tests::checkFilterInternals(i, PbiFilter::INTERSECT, 2, std::vector<size_t>{});\r
+ }\r
+ { // move\r
+ const auto i = PbiFilter::Intersection({ tests::SimpleFilter{ }, tests::SimpleFilter{ } });\r
+ tests::checkFilterInternals(i, PbiFilter::INTERSECT, 2, std::vector<size_t>{});\r
+ }\r
+ }\r
+\r
+ { // 2-child intersect, sorted & unique-d by PbiFilter\r
+\r
+ const auto child1 = tests::SortUniqueTestFilter{ };\r
+ const auto child2 = tests::SortUniqueTestFilter2{ };\r
+ const auto i = PbiFilter::Intersection({ child1, child2 });\r
+\r
+ tests::checkFilterRows(child1, std::vector<size_t>{2, 7, 0, 3, 4, 1, 8});\r
+ tests::checkFilterRows(child2, std::vector<size_t>{3, 7, 5 });\r
+ tests::checkFilterRows(i, std::vector<size_t>{3, 7});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, AlignedEndFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedEndFilter{ 4055 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedEndFilter{ 4055, Compare::NOT_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedEndFilter{ 4000, Compare::LESS_THAN } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedEndFilter{ 5560, Compare::GREATER_THAN } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedEndFilter{ 5560, Compare::GREATER_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{2,3});\r
+ }\r
+\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedEndFilter{ 7000, Compare::GREATER_THAN } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, AlignedLengthFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedLengthFilter{ 500, Compare::GREATER_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedLengthFilter{ 1000, Compare::GREATER_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, AlignedStartFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedStartFilter{ 2600, Compare::LESS_THAN } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedStartFilter{ 4102, Compare::GREATER_THAN } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedStartFilter{ 4102, Compare::GREATER_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{2,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedStartFilter{ 6000, Compare::GREATER_THAN } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{ });\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, AlignedStrandFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedStrandFilter{ Strand::FORWARD } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedStrandFilter{ Strand::REVERSE } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiAlignedStrandFilter{ Strand::FORWARD, Compare::NOT_EQUAL } }; // same as Strand::REVERSE\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});\r
+ }\r
+\r
+ // unsupported compare types throw\r
+ EXPECT_THROW(PbiAlignedStrandFilter(Strand::FORWARD, Compare::LESS_THAN), std::runtime_error);\r
+ EXPECT_THROW(PbiAlignedStrandFilter(Strand::FORWARD, Compare::LESS_THAN_EQUAL), std::runtime_error);\r
+ EXPECT_THROW(PbiAlignedStrandFilter(Strand::FORWARD, Compare::GREATER_THAN), std::runtime_error);\r
+ EXPECT_THROW(PbiAlignedStrandFilter(Strand::FORWARD, Compare::GREATER_THAN_EQUAL), std::runtime_error);\r
+}\r
+\r
+TEST(PbiFilterTest, BarcodeFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodeFilter{ 17 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodeFilter{ 18 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodeFilter{ 0 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, BarcodeForwardFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodeForwardFilter{ 17 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodeForwardFilter{ 400 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodeForwardFilter{ {0, 256} } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, BarcodeQualityFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodeQualityFilter{ 80, Compare::GREATER_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodeQualityFilter{ 40, Compare::LESS_THAN } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, BarcodeReverseFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodeReverseFilter{ 18 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodeReverseFilter{ 400 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{ });\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodeReverseFilter{ {1, 257} } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, BarcodesFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodesFilter{ 17, 18 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodesFilter{ 17, 19 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{ });\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiBarcodesFilter{ std::make_pair(17,18) } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, IdentityFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiIdentityFilter{ 0.95, Compare::GREATER_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{3});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, LocalContextFilterOk)\r
+{\r
+ { // == NO_LOCAL_CONTEXT\r
+ const auto filter = PbiFilter { PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0});\r
+ }\r
+ { // != ADAPTER_BEFORE (exact match)\r
+ const auto filter = PbiFilter { PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2,3});\r
+ }\r
+ { // contains ADAPTER_BEFORE\r
+ const auto filter = PbiFilter { PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});\r
+ }\r
+ { // does not contain ADAPTER_BEFORE\r
+ const auto filter = PbiFilter { PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2});\r
+ }\r
+ { // include both ADAPTER_BEFORE and ADAPTER_AFTER\r
+ const auto filter = PbiFilter::Intersection(\r
+ {\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::CONTAINS }\r
+ });\r
+ tests::checkFilterRows(filter, std::vector<size_t>{3});\r
+ }\r
+ { // exclude both ADAPTER_BEFORE and ADAPTER_AFTER\r
+ const auto filter = PbiFilter::Intersection(\r
+ {\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS },\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::NOT_CONTAINS }\r
+ });\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0});\r
+ }\r
+ { // include everything with either ADAPTER_BEFORE or ADAPTER_AFTER\r
+ const auto filter = PbiFilter::Union(\r
+ {\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::CONTAINS }\r
+ });\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2,3});\r
+ }\r
+ { // include everything with either ADAPTER_BEFORE or ADAPTER_AFTER, but not both\r
+ const auto filter = PbiFilter::Intersection(\r
+ {\r
+ PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL },\r
+ PbiFilter::Union(\r
+ {\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS },\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::NOT_CONTAINS }\r
+ })\r
+ });\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, MapQualityFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiMapQualityFilter{ 254 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiMapQualityFilter{ 254, Compare::NOT_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, MovieNameFilterOk)\r
+{\r
+ const auto bamFile = BamFile{ tests::Data_Dir + string{ "/group/test2.bam" } };\r
+ const auto index = PbiRawData{ bamFile.PacBioIndexFilename() };\r
+\r
+ {\r
+ const auto filter = PbiFilter{ PbiMovieNameFilter{ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0" } };\r
+ const auto expectedRows = std::vector<size_t>{0,1,2,3};\r
+ for (size_t row : expectedRows)\r
+ EXPECT_TRUE(filter.Accepts(index, row));\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiMovieNameFilter{ "does_not_exist" } };\r
+ const auto expectedRows = std::vector<size_t>{};\r
+ for (size_t row : expectedRows)\r
+ EXPECT_TRUE(filter.Accepts(index, row));\r
+ }\r
+ {\r
+ const auto names = vector<string>{"does_not_exist",\r
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0"};\r
+ const auto filter = PbiFilter{ PbiMovieNameFilter{ names } };\r
+ const auto expectedRows = std::vector<size_t>{0,1,2,3};\r
+ for (size_t row : expectedRows)\r
+ EXPECT_TRUE(filter.Accepts(index, row));\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, NumDeletedBasesFilterOk)\r
+{\r
+ // del: { 12, 38, 45, 11} - calculated from raw data, not stored directly in testing object or read from PBI file\r
+\r
+ {\r
+ const auto filter = PbiFilter{ PbiNumDeletedBasesFilter{ 12, Compare::LESS_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiNumDeletedBasesFilter{ 45, Compare::EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{2});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, NumInsertedBasesFilterOk)\r
+{\r
+ // ins: { 17, 63, 65, 20 } - calculated from raw data, not stored directly testing object or read from PBI file\r
+\r
+ {\r
+ const auto filter = PbiFilter{ PbiNumInsertedBasesFilter{ 63, Compare::GREATER_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiNumInsertedBasesFilter{ 17, Compare::NOT_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2,3});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, NumMatchesFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiNumMatchesFilter{ 1000, Compare::GREATER_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiNumMatchesFilter{ 400, Compare::LESS_THAN } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, NumMismatchesFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiNumMismatchesFilter{ 0, Compare::EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiNumMismatchesFilter{ 0, Compare::NOT_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, QueryEndFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryEndFilter{ 4055 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryEndFilter{ 6200, Compare::GREATER_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{3});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, QueryLengthFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryLengthFilter{ 500, Compare::GREATER_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryLengthFilter{ 1000, Compare::GREATER_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,2});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, QueryNameFilterOk)\r
+{\r
+ const auto bamFile = BamFile{ tests::Data_Dir + string{ "/group/test2.bam" } };\r
+ const auto index = PbiIndex{ bamFile.PacBioIndexFilename() };\r
+\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2579_4055" } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/5615_6237" } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{3});\r
+ }\r
+\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "does_not_exist/0/0_0" } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ }\r
+ {\r
+ const auto names = vector<string>{"m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2579_4055",\r
+ "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/5615_6237"};\r
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ names } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1,3});\r
+ }\r
+\r
+ // invalid QNAME syntax throws\r
+ EXPECT_THROW(\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "" } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ },\r
+ std::runtime_error);\r
+ EXPECT_THROW(\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "foo" } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ },\r
+ std::runtime_error);\r
+ EXPECT_THROW(\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "foo/bar" } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ },\r
+ std::runtime_error);\r
+ EXPECT_THROW(\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryNameFilter{ "foo/bar/baz_bam" } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ },\r
+ std::exception); // come back to see why this is not runtime_error but something else\r
+}\r
+\r
+TEST(PbiFilterTest, QueryStartFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryStartFilter{ 4101 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{2});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryStartFilter{ 5000 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiQueryStartFilter{ 5000, Compare::GREATER_THAN } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{3});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, ReadAccuracyFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiReadAccuracyFilter{ 0.9 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiReadAccuracyFilter{ 0.9, Compare::GREATER_THAN } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,2});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, ReadGroupFilterOk)\r
+{\r
+ { // numeric ID\r
+ const auto filter = PbiReadGroupFilter{ -1197849594 };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});\r
+\r
+ const auto filter2 = PbiReadGroupFilter{ 200 };\r
+ tests::checkFilterRows(filter2, std::vector<size_t>{});\r
+ }\r
+ { // string ID\r
+ const auto filter = PbiReadGroupFilter{ "b89a4406" };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});\r
+\r
+ const auto filter2 = PbiReadGroupFilter{ "b89a4406" };\r
+ tests::checkFilterRows(filter2, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ { // ReadGroupInfo object\r
+ const auto rg = ReadGroupInfo{ "b89a4406" };\r
+ const auto filter = PbiReadGroupFilter{ rg };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ { // multi-ID\r
+ const auto ids = vector<int32_t>({-1197849594, 200});\r
+ const auto filter = PbiReadGroupFilter{ ids };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ { // multi-string\r
+ const auto ids = vector<string>({"b89a4406", "deadbeef"});\r
+ const auto filter = PbiReadGroupFilter{ ids };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ { // multi-ReadGroupInfo\r
+ const auto ids = vector<ReadGroupInfo>({ ReadGroupInfo("b89a4406"), ReadGroupInfo("deadbeef")});\r
+ const auto filter = PbiReadGroupFilter{ ids };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, ReferenceEndFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiReferenceEndFilter{ 9900 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiReferenceEndFilter{ 9900, Compare::GREATER_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,3});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, ReferenceIdFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiReferenceIdFilter{ 0 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiReferenceIdFilter{ 0, Compare::NOT_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ }\r
+ {\r
+ const auto ids = vector<int32_t>({0, 42});\r
+ const auto filter = PbiFilter{ PbiReferenceIdFilter{ ids } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, ReferenceNameFilterOk)\r
+{\r
+ const auto bamFile = BamFile{ tests::Data_Dir + string{ "/group/test2.bam" } };\r
+ const auto index = PbiRawData{ bamFile.PacBioIndexFilename() };\r
+\r
+ {\r
+ const auto filter = PbiFilter{ PbiReferenceNameFilter{ "lambda_NEB3011" } };\r
+ const auto expectedRows = std::vector<size_t>{0,1,2,3};\r
+ for (size_t row : expectedRows)\r
+ EXPECT_TRUE(filter.Accepts(index, row));\r
+\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiReferenceNameFilter{ "lambda_NEB3011", Compare::NOT_EQUAL } };\r
+ const auto expectedRows = std::vector<size_t>{};\r
+ for (size_t row : expectedRows)\r
+ EXPECT_TRUE(filter.Accepts(index, row));\r
+ }\r
+ {\r
+ const auto names = vector<string>({ "lambda_NEB3011" }); // this file only has 1 :(\r
+ const auto filter = PbiFilter{ PbiReferenceNameFilter{ names } };\r
+ const auto expectedRows = std::vector<size_t>{0,1,2,3};\r
+ for (size_t row : expectedRows)\r
+ EXPECT_TRUE(filter.Accepts(index, row));\r
+ }\r
+\r
+ // unsupported compare types throw\r
+ EXPECT_THROW(PbiReferenceNameFilter("foo", Compare::LESS_THAN), std::runtime_error);\r
+ EXPECT_THROW(PbiReferenceNameFilter("foo", Compare::LESS_THAN_EQUAL), std::runtime_error);\r
+ EXPECT_THROW(PbiReferenceNameFilter("foo", Compare::GREATER_THAN), std::runtime_error);\r
+ EXPECT_THROW(PbiReferenceNameFilter("foo", Compare::GREATER_THAN_EQUAL), std::runtime_error);\r
+}\r
+\r
+TEST(PbiFilterTest, ReferenceStartFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiReferenceStartFilter{ 8453 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{1});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiReferenceStartFilter{ 9200, Compare::GREATER_THAN_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,3});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, ZmwFilterOk)\r
+{\r
+ {\r
+ const auto filter = PbiFilter{ PbiZmwFilter{ 14743 } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});\r
+ }\r
+ {\r
+ const auto filter = PbiFilter{ PbiZmwFilter{ 14743, Compare::NOT_EQUAL } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{});\r
+ }\r
+ {\r
+ const auto zmws = vector<int32_t>({14743,42,200});\r
+ const auto filter = PbiFilter{ PbiZmwFilter{ zmws } };\r
+ tests::checkFilterRows(filter, std::vector<size_t>{0,1,2,3});\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, FromDataSetOk)\r
+{\r
+ const auto expectedFilter =\r
+ PbiFilter::Union(\r
+ {\r
+ PbiFilter::Intersection(\r
+ {\r
+ PbiZmwFilter{ 14743 },\r
+ PbiReadAccuracyFilter { 0.9, Compare::GREATER_THAN_EQUAL }\r
+ }),\r
+\r
+ PbiReferenceStartFilter { 9200, Compare::GREATER_THAN_EQUAL }\r
+ });\r
+\r
+\r
+ auto properties1 = Properties{ };\r
+ properties1.Add(Property{ "zm", "14743", "==" });\r
+ properties1.Add(Property{ "rq", "0.9", ">=" });\r
+\r
+ auto datasetFilter1 = Filter{ };\r
+ datasetFilter1.Properties(properties1);\r
+\r
+ auto properties2 = Properties{ };\r
+ properties2.Add(Property{ "pos", "9200", ">=" });\r
+\r
+ auto datasetFilter2 = Filter{ };\r
+ datasetFilter2.Properties(properties2);\r
+\r
+ auto datasetFilters = Filters{ };\r
+ datasetFilters.Add(datasetFilter1);\r
+ datasetFilters.Add(datasetFilter2);\r
+ auto dataset = DataSet{ };\r
+ dataset.Filters(datasetFilters);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+\r
+ for (size_t i = 0; i < tests::shared_index.NumReads(); ++i) {\r
+ EXPECT_EQ(expectedFilter.Accepts(tests::shared_index, i),\r
+ generatedFilter.Accepts(tests::shared_index, i));\r
+ }\r
+}\r
+\r
+TEST(PbiFilterTest, BarcodeListFromDataSetXmlOk)\r
+{\r
+ auto runner = [](const Property& property,\r
+ const PbiFilter& expectedFilter,\r
+ const std::vector<size_t>& expectedResults)\r
+ {\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, expectedResults);\r
+ tests::checkFilterRows(generatedFilter, expectedResults);\r
+ };\r
+\r
+ // single barcode\r
+ runner(Property{ "bc", "18", "==" },\r
+ PbiBarcodeFilter{ 18, Compare::EQUAL },\r
+ std::vector<size_t>{1,3});\r
+\r
+ // single barcode (bracketed)\r
+ runner(Property{ "bc", "[18]", "==" },\r
+ PbiBarcodeFilter{ 18, Compare::EQUAL },\r
+ std::vector<size_t>{1,3});\r
+\r
+ // barcode pair (square brackets)\r
+ runner(Property{ "bc", "[17,18]", "==" },\r
+ PbiBarcodesFilter{ {17, 18}, Compare::EQUAL },\r
+ std::vector<size_t>{1,3});\r
+\r
+ // barcode pair (parens)\r
+ runner(Property{ "bc", "(17,18)", "==" },\r
+ PbiBarcodesFilter{ {17, 18}, Compare::EQUAL },\r
+ std::vector<size_t>{1,3});\r
+\r
+ // barcode pair (curly brackets)\r
+ runner(Property{ "bc", "{17,18}", "==" },\r
+ PbiBarcodesFilter{ {17, 18}, Compare::EQUAL },\r
+ std::vector<size_t>{1,3});\r
+\r
+ // barcode pair (list, but no brackets)\r
+ runner(Property{ "bc", "17,18", "==" },\r
+ PbiBarcodesFilter{ {17, 18}, Compare::EQUAL },\r
+ std::vector<size_t>{1,3});\r
+\r
+ // barcode pair - same value\r
+ runner(Property{ "bc", "[18,18]", "==" },\r
+ PbiBarcodesFilter{ {18, 18}, Compare::EQUAL },\r
+ std::vector<size_t>{}); // none share forward & reverse\r
+\r
+ auto expectFail = [](const Property& property)\r
+ {\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ EXPECT_THROW(PbiFilter::FromDataSet(dataset), std::runtime_error);\r
+ };\r
+\r
+ // list-ish, but only one value\r
+ expectFail(Property{ "bc", "[18,]", "==" });\r
+\r
+ // too many barcodes\r
+ expectFail(Property{ "bc", "[18,18,18]", "==" });\r
+}\r
+\r
+TEST(PbiFilterTest, LocalContextFiltersFromDataSetXmlOk)\r
+{\r
+ { // no adapters or barcodes\r
+\r
+ const auto expectedFilter =\r
+ PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::EQUAL };\r
+\r
+ // XML: <Property Name="cx" Value="0" Operator="==" />\r
+ Property property("cx", "0", "==");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{0});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{0});\r
+ }\r
+ { // any adapters or barcodes\r
+\r
+ const auto expectedFilter =\r
+ PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL };\r
+\r
+ // XML: <Property Name="cx" Value="0" Operator="!=" />\r
+ Property property("cx", "0", "!=");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2,3});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});\r
+ }\r
+ { // contains adapter_before\r
+\r
+ const auto expectedFilter =\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS };\r
+\r
+ // XML: <Property Name="cx" Value="1" Operator="&" />\r
+ Property property("cx", "1", "&");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,3});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,3});\r
+ }\r
+ { // contains adapter_before\r
+\r
+ const auto expectedFilter =\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS };\r
+\r
+ // XML: <Property Name="cx" Value="ADAPTER_BEFORE" Operator="&" />\r
+ Property property("cx", "ADAPTER_BEFORE", "&");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,3});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,3});\r
+ }\r
+ { // contains adapter_after\r
+\r
+ const auto expectedFilter =\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::CONTAINS };\r
+\r
+ // XML: <Property Name="cx" Value="2" Operator="&" />\r
+ Property property("cx", "2", "&");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{2,3});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{2,3});\r
+ }\r
+ { // contains adapter_before or adapter_after\r
+\r
+ const auto expectedFilter =\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,\r
+ Compare::CONTAINS };\r
+\r
+ // XML: <Property Name="cx" Value="3" Operator="&" />\r
+ Property property("cx", "3", "&");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2,3});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});\r
+ }\r
+ { // contains adapter_before or adapter_after\r
+\r
+ const auto expectedFilter =\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,\r
+ Compare::CONTAINS };\r
+\r
+ // XML: <Property Name="cx" Value="ADAPTER_BEFORE | ADAPTER_AFTER" Operator="&" />\r
+ Property property("cx", "ADAPTER_BEFORE | ADAPTER_AFTER", "&");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2,3});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});\r
+ }\r
+ { // contains adapter_before or adapter_after - no whitespace separation\r
+\r
+ const auto expectedFilter =\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,\r
+ Compare::CONTAINS };\r
+\r
+ // XML: <Property Name="cx" Value="ADAPTER_BEFORE|ADAPTER_AFTER" Operator="&" />\r
+ Property property("cx", "ADAPTER_BEFORE|ADAPTER_AFTER", "&");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2,3});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});\r
+ }\r
+ { // contains adapter_before or adapter_after - a lot of whitespace separation\r
+\r
+ const auto expectedFilter =\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,\r
+ Compare::CONTAINS };\r
+\r
+ // XML: <Property Name="cx" Value="ADAPTER_BEFORE | ADAPTER_AFTER" Operator="&" />\r
+ Property property("cx", "ADAPTER_BEFORE | ADAPTER_AFTER", "&");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2,3});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});\r
+ }\r
+ { // contains adapter_before or adapter_after, but not both\r
+\r
+ const auto expectedFilter = PbiFilter::Union(\r
+ {\r
+ PbiFilter::Intersection(\r
+ {\r
+ PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL },\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS }\r
+ }),\r
+ PbiFilter::Intersection(\r
+ {\r
+ PbiLocalContextFilter{ LocalContextFlags::NO_LOCAL_CONTEXT, Compare::NOT_EQUAL },\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::NOT_CONTAINS }\r
+ })\r
+ });\r
+\r
+ // XML:\r
+ // <Filters>\r
+ // <Filter>\r
+ // <Properties>\r
+ // <Property Name="cx" Value="0" Operator="!=" />\r
+ // <Property Name="cx" Value="1" Operator="~" />\r
+ // </Properties>\r
+ // </Filter>\r
+ // <Filter>\r
+ // <Properties>\r
+ // <Property Name="cx" Value="0" Operator="!=" />\r
+ // <Property Name="cx" Value="2" Operator="~" />\r
+ // </Properties>\r
+ // </Filter>\r
+ // </Filters>\r
+\r
+ auto filter1 = Filter{ };\r
+ filter1.Properties().Add(Property("cx", "0", "!="));\r
+ filter1.Properties().Add(Property("cx", "1", "~"));\r
+\r
+ auto filter2 = Filter{ };\r
+ filter2.Properties().Add(Property("cx", "0", "!="));\r
+ filter2.Properties().Add(Property("cx", "2", "~"));\r
+\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter1);\r
+ dataset.Filters().Add(filter2);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2});\r
+\r
+ }\r
+ { // contains adapter_before or adapter_after\r
+\r
+ const auto expectedFilter = PbiFilter::Union(\r
+ {\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::CONTAINS }\r
+ });\r
+\r
+ // XML:\r
+ // <Filters>\r
+ // <Filter>\r
+ // <Properties>\r
+ // <Property Name="cx" Value="1" Operator="&" />\r
+ // </Properties>\r
+ // </Filter>\r
+ // <Filter>\r
+ // <Properties>\r
+ // <Property Name="cx" Value="2" Operator="&" />\r
+ // </Properties>\r
+ // </Filter>\r
+ // </Filters>\r
+\r
+ auto filter1 = Filter{ };\r
+ filter1.Properties().Add(Property("cx", "1", "&"));\r
+\r
+ auto filter2 = Filter{ };\r
+ filter2.Properties().Add(Property("cx", "2", "&"));\r
+\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter1);\r
+ dataset.Filters().Add(filter2);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1,2,3});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1,2,3});\r
+ }\r
+ { // adapter_before and adapter_after\r
+\r
+ const auto expectedFilter = PbiFilter::Intersection(\r
+ {\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::CONTAINS }\r
+ });\r
+\r
+ // XML:\r
+ // <Property Name="cx" Value="1" Operator="&" />\r
+ // <Property Name="cx" Value="2" Operator="&" />\r
+ Property property1("cx", "1", "&");\r
+ Property property2("cx", "2", "&");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property1);\r
+ filter.Properties().Add(property2);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{3});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{3});\r
+ }\r
+ { // adapter_before, but no adapter_after\r
+\r
+ const auto expectedFilter = PbiFilter::Intersection(\r
+ {\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::CONTAINS },\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::NOT_CONTAINS }\r
+ });\r
+\r
+ // XML:\r
+ // <Property Name="cx" Value="1" Operator="&" />\r
+ // <Property Name="cx" Value="2" Operator="~" />\r
+ Property property1("cx", "1", "&");\r
+ Property property2("cx", "2", "~");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property1);\r
+ filter.Properties().Add(property2);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{1});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{1});\r
+ }\r
+ { // contains no adapter_before\r
+\r
+ const auto expectedFilter =\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS };\r
+\r
+ // XML: <Property Name="cx" Value="1" Operator="~" />\r
+ Property property("cx", "1", "~");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{0,2});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{0,2});\r
+ }\r
+ { // contains no adapter_before or adapter_after\r
+\r
+ const auto expectedFilter = PbiFilter::Intersection(\r
+ {\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE, Compare::NOT_CONTAINS },\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_AFTER, Compare::NOT_CONTAINS }\r
+ });\r
+\r
+ // XML:\r
+ // <Property Name="cx" Value="1" Operator="~" />\r
+ // <Property Name="cx" Value="2" Operator="~" />\r
+ Property property1("cx", "1", "~");\r
+ Property property2("cx", "2", "~");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property1);\r
+ filter.Properties().Add(property2);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{0});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{0});\r
+ }\r
+ { // contains no adapter_before or adapter_after\r
+\r
+ const auto expectedFilter =\r
+ PbiLocalContextFilter{ LocalContextFlags::ADAPTER_BEFORE | LocalContextFlags::ADAPTER_AFTER,\r
+ Compare::NOT_CONTAINS };\r
+\r
+ // XML: <Property Name="cx" Value="3" Operator="~" />\r
+ Property property("cx", "3", "~");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ const auto generatedFilter = PbiFilter::FromDataSet(dataset);\r
+ tests::checkFilterRows(expectedFilter, std::vector<size_t>{0});\r
+ tests::checkFilterRows(generatedFilter, std::vector<size_t>{0});\r
+ }\r
+ { // throws on invalid enum name\r
+\r
+ Property property("cx", "DOES_NOT_EXIST", "~");\r
+\r
+ auto filter = Filter{ };\r
+ filter.Properties().Add(property);\r
+ DataSet dataset = DataSet{ };\r
+ dataset.Filters().Add(filter);\r
+\r
+ EXPECT_THROW(PbiFilter::FromDataSet(dataset), std::runtime_error);\r
+ }\r
+}\r
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/PbiFilterQuery.h>
+#include <algorithm>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(PbiFilterQueryTest, QueryOk)
+{
+ const auto bamFile = BamFile{ tests::Data_Dir + string{ "/group/test2.bam" } };
+
+ {
+ int count = 0;
+ PbiFilterQuery query( PbiQueryLengthFilter{ 500, Compare::GREATER_THAN_EQUAL}, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_GE((r.QueryEnd() - r.QueryStart()), 500);
+ }
+ EXPECT_EQ(3, count);
+ }
+ {
+ // all records aligned to reverse strand && pos >= 9200
+ const auto filter = PbiFilter::Intersection(
+ {
+ PbiAlignedStrandFilter{Strand::REVERSE},
+ PbiReferenceStartFilter{9200, Compare::GREATER_THAN_EQUAL}
+ });
+
+ int count = 0;
+ PbiFilterQuery query(filter, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_EQ(Strand::REVERSE, r.AlignedStrand());
+ EXPECT_GE((r.ReferenceStart()), 9200);
+ EXPECT_EQ(string("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/5615_6237"), r.FullName());
+ }
+ EXPECT_EQ(1, count);
+ }
+ {
+ // all records aligned to forward strand && pos >= 9200
+ const auto filter = PbiFilter::Intersection(
+ {
+ PbiAlignedStrandFilter{Strand::FORWARD},
+ PbiReferenceStartFilter{9200, Compare::GREATER_THAN_EQUAL}
+ });
+
+ int count = 0;
+ PbiFilterQuery query(filter, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_EQ(Strand::FORWARD, r.AlignedStrand());
+ EXPECT_GE((r.ReferenceStart()), 9200);
+ EXPECT_EQ(string("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2114_2531"), r.FullName());
+ }
+ EXPECT_EQ(1, count);
+ }
+ {
+ // all records from RG ("b89a4406") with numMatches >= 1200
+ const auto filter = PbiFilter::Intersection(
+ {
+ PbiReadGroupFilter{"b89a4406"},
+ PbiNumMatchesFilter{1200, Compare::GREATER_THAN_EQUAL}
+ });
+
+ int count = 0;
+ PbiFilterQuery query(filter, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_EQ(string("b89a4406"), r.ReadGroupId());
+ EXPECT_GE((r.NumMatches()), 1200);
+ if (count == 1)
+ EXPECT_EQ(string("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/2579_4055"), r.FullName());
+ else if (count == 2)
+ EXPECT_EQ(string("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/14743/4101_5571"), r.FullName());
+ }
+ EXPECT_EQ(2, count);
+ }
+}
+
+TEST(PbiFilterQueryTest, ZmwRangeFromDatasetOk)
+{
+ const auto expectedMovieName = string{ "m150404_101626_42267_c100807920800000001823174110291514_s1_p0" };
+
+ const DataSet ds(tests::Data_Dir + "/chunking/chunking.subreadset.xml");
+ EXPECT_EQ(3, ds.BamFiles().size());
+
+ { // movie name
+
+ int count = 0;
+ PbiFilterQuery query{ PbiMovieNameFilter{expectedMovieName}, ds };
+ for (const BamRecord& r : query) {
+ EXPECT_EQ(expectedMovieName, r.MovieName());
+ ++count;
+ }
+ EXPECT_EQ(1220, count);
+ }
+
+ { // sequencing chemistries
+ set<string> chems{ ds.SequencingChemistries() };
+ set<string> expected{ "P6-C4" };
+ EXPECT_TRUE(equal(chems.begin(), chems.end(), expected.begin()));
+ }
+
+ { // min ZMW
+
+ int count = 0;
+ PbiFilterQuery query{ PbiZmwFilter{54, Compare::GREATER_THAN}, ds };
+ for (const BamRecord& r : query) {
+ EXPECT_GT(r.HoleNumber(), 54);
+ ++count;
+ }
+ EXPECT_EQ(1220, count);
+ }
+
+ { // max ZMW
+
+ int count = 0;
+ PbiFilterQuery query{ PbiZmwFilter{1816, Compare::LESS_THAN}, ds };
+ for (const BamRecord& r : query) {
+ EXPECT_LT(r.HoleNumber(),1816);
+ ++count;
+ }
+ EXPECT_EQ(150, count);
+ }
+
+ { // put all together, from DataSet XML
+
+ const PbiFilter filter = PbiFilter::FromDataSet(ds);
+ PbiFilterQuery query(filter, ds);
+ int count = 0;
+ for (const BamRecord& r : query) {
+ EXPECT_EQ(expectedMovieName, r.MovieName());
+ const auto zmw = r.HoleNumber();
+ EXPECT_GT(zmw, 54);
+ EXPECT_LT(zmw, 1816);
+ ++count;
+ }
+ EXPECT_EQ(150, count);
+ }
+ { // empty filter object - should return all records from the same dataset
+
+ PbiFilterQuery query(PbiFilter{ }, ds);
+ int count = 0;
+ for (const BamRecord& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(1220, count);
+ }
+ { // no <Filters> element present at all
+
+ const DataSet ds(tests::GeneratedData_Dir + "/chunking_missingfilters.subreadset.xml");
+ const PbiFilter filter = PbiFilter::FromDataSet(ds);
+ PbiFilterQuery query(filter, ds);
+ int count = 0;
+ for (const BamRecord& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(1220, count);
+ }
+ { // <Filters> element contains no child <Filter> elements
+
+ const DataSet ds(tests::GeneratedData_Dir + "/chunking_emptyfilters.subreadset.xml");
+ const PbiFilter filter = PbiFilter::FromDataSet(ds);
+ PbiFilterQuery query(filter, ds);
+ int count = 0;
+ for (const BamRecord& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(1220, count);
+ }
+}
+
+TEST(PbiFilterQueryTest, MissingPbiShouldThrow)
+{
+ const PbiFilter filter{ PbiZmwFilter{31883} };
+ const string phi29Bam = tests::GeneratedData_Dir + "/missing_pbi.bam";
+ const string hasPbiBam = tests::Data_Dir + "/polymerase/production.scraps.bam";
+
+ { // single file, missing PBI
+
+ EXPECT_THROW(PbiFilterQuery(filter, phi29Bam), std::runtime_error);
+ }
+
+ { // from dataset, all missing PBI
+
+ DataSet ds;
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+ EXPECT_THROW(PbiFilterQuery(filter, ds), std::runtime_error);
+ }
+
+ { // from dataset, mixed PBI presence
+
+ DataSet ds;
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.SubreadBamFile", phi29Bam));
+ ds.ExternalResources().Add(ExternalResource("PacBio.SubreadFile.ScrapsBamFile", hasPbiBam));
+ EXPECT_THROW(PbiFilterQuery(filter, ds), std::runtime_error);
+ }
+}
+
+TEST(PbiFilterQueryTest, QNameWhitelistFile)
+{
+ const DataSet ds(tests::Data_Dir + "/polymerase/qnameFiltered.subreads.dataset.xml");
+ const PbiFilter filter = PbiFilter::FromDataSet(ds);
+ PbiFilterQuery query(filter, ds);
+ int count = 0;
+ for (const BamRecord& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(3, count);
+}
+
+TEST(PbiFilterQueryTest, EmptyFiles)
+{
+ const BamFile file{ tests::Data_Dir + "/empty.bam" };
+ PbiFilterQuery query{ PbiFilter{}, file };
+ size_t count = 0;
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(0, count);
+}
+
+TEST(PbiFilterQueryTest, BarcodeData)
+{
+ const BamFile file{ tests::Data_Dir + "/phi29.bam" };
+
+ // bc_quality == 1
+ {
+ size_t count = 0;
+ PbiFilterQuery query{ PbiBarcodeQualityFilter{1}, file };
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(120, count);
+ }
+
+ // bc_quality != 1
+ {
+ size_t count = 0;
+ PbiFilterQuery query{ PbiBarcodeQualityFilter{1, Compare::NOT_EQUAL}, file };
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(0, count);
+ }
+
+ // bc_forward == 0
+ {
+ size_t count = 0;
+ PbiFilterQuery query{ PbiBarcodeForwardFilter{0}, file };
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(40, count);
+ }
+
+ // bc_forward == [0,2]
+ {
+ size_t count = 0;
+ const auto ids = vector<int16_t>{ 0, 2 };
+ PbiFilterQuery query{ PbiBarcodeForwardFilter{ ids }, file };
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(80, count);
+ }
+
+ // bc_reverse != 0
+ {
+ size_t count = 0;
+ PbiFilterQuery query{ PbiBarcodeReverseFilter{0, Compare::NOT_EQUAL}, file };
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(80, count);
+ }
+}
+
+TEST(PbiFilterQueryTest, BarcodeQualityFromXml)
+{
+
+const string xml_all = R"_XML_(
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Tags=""
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5194"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="bq" Operator="=" Value="1"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
+)_XML_";
+
+const string xml_none = R"_XML_(
+<?xml version="1.0" encoding="utf-8"?>
+<pbds:SubreadSet
+ xmlns="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:pbbase="http://pacificbiosciences.com/PacBioBaseDataModel.xsd"
+ xmlns:pbsample="http://pacificbiosciences.com/PacBioSampleInfo.xsd"
+ xmlns:pbmeta="http://pacificbiosciences.com/PacBioCollectionMetadata.xsd"
+ xmlns:pbds="http://pacificbiosciences.com/PacBioDatasets.xsd"
+ xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd"
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe519c"
+ TimeStampedName="subreadset_150304_231155"
+ MetaType="PacBio.DataSet.SubreadSet"
+ Name="DataSet_SubreadSet"
+ Tags=""
+ Version="3.0.0"
+ CreatedAt="2015-01-27T09:00:01">
+<pbbase:ExternalResources>
+ <pbbase:ExternalResource
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5193"
+ TimeStampedName="subread_bam_150304_231155"
+ MetaType="PacBio.SubreadFile.SubreadBamFile"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam">
+ <pbbase:FileIndices>
+ <pbbase:FileIndex
+ UniqueId="b095d0a3-94b8-4918-b3af-a3f81bbe5194"
+ TimeStampedName="bam_index_150304_231155"
+ MetaType="PacBio.Index.PacBioIndex"
+ ResourceId="m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam.pbi"/>
+ </pbbase:FileIndices>
+ </pbbase:ExternalResource>
+</pbbase:ExternalResources>
+<pbds:Filters>
+ <pbds:Filter>
+ <pbbase:Properties>
+ <pbbase:Property Name="bq" Operator="!=" Value="1"/>
+ </pbbase:Properties>
+ </pbds:Filter>
+</pbds:Filters>
+</pbds:SubreadSet>
+)_XML_";
+
+ const BamFile file{ tests::Data_Dir + "/phi29.bam" };
+
+ { // filter allows all records
+ const DataSet ds = DataSet::FromXml(xml_all);
+ const PbiFilterQuery query { PbiFilter::FromDataSet(ds), file };
+ size_t count = 0;
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(120, count);
+ }
+ { // filter allows no records
+ const DataSet ds = DataSet::FromXml(xml_none);
+ const PbiFilterQuery query { PbiFilter::FromDataSet(ds), file };
+ size_t count = 0;
+ for (const auto& r : query) {
+ (void)r;
+ ++count;
+ }
+ EXPECT_EQ(0, count);
+ }
+}
+
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/../../src/Pulse2BaseCache.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+TEST(Pulse2BaseCacheTest, CountsDetectedInConstructor)
+{
+ const string pulseCalls = "ACccTTAGtTCAtG";
+ const string trimmedPC = "ACTTAGTCAG";
+
+ const Pulse2BaseCache cache{ pulseCalls };
+
+ EXPECT_EQ(pulseCalls.size(), cache.NumPulses());
+ EXPECT_EQ(trimmedPC.size(), cache.NumBases());
+}
+
+TEST(Pulse2BaseCacheTest, RemovesSquashedPulsesFromString)
+{
+ const string pulseCalls = "ACccTTAGtTCAtG";
+ const string trimmedPC = "ACTTAGTCAG";
+ const string altLabel = "-G--A--T--AC--";
+ const string trimmedAlt = "-GA--T-AC-";
+
+ const Pulse2BaseCache cache{ pulseCalls };
+
+ EXPECT_EQ(trimmedPC, cache.RemoveSquashedPulses(pulseCalls));
+ EXPECT_EQ(trimmedAlt, cache.RemoveSquashedPulses(altLabel));
+}
+
+TEST(Pulse2BaseCacheTest, RemovesSquashedPulsesFromVector)
+{
+ const string pulseCalls = "ACccTTAGtTCAtG";
+ const vector<uint16_t> pkMean = {5,4,2,2,3,8,8,8,4,7,7,7,3,4};
+ const vector<uint16_t> trimmedPkmean = {5,4,3,8,8,8,7,7,7,4};
+
+ const Pulse2BaseCache cache{ pulseCalls };
+
+ EXPECT_EQ(trimmedPkmean, cache.RemoveSquashedPulses(pkMean));
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Yuan Li
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/QNameQuery.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+static const string dataDir = tests::Data_Dir + "/group/";
+static const string test1fn = string(dataDir) + "test1.bam";
+static const string test2fn = string(dataDir) + "test2.bam";
+static const string test3fn = string(dataDir) + "test3.bam";
+
+static
+void TestQNameQuery(const string& fn, const vector<int>& expected)
+{
+ EXPECT_NO_THROW(
+ {
+ vector<int> counts;
+ QNameQuery qQuery(fn);
+ for (const vector<BamRecord>& records : qQuery)
+ counts.push_back(records.size());
+ EXPECT_EQ(expected, counts);
+ });
+}
+
+static
+void TestNoneConstQNameQuery(const string& fn, const vector<int>& expected)
+{
+ EXPECT_NO_THROW(
+ {
+ vector<int> counts;
+ QNameQuery qQuery(fn);
+ for (vector<BamRecord>& records : qQuery)
+ counts.push_back(records.size());
+ EXPECT_EQ(expected, counts);
+ });
+}
+
+TEST(QNameQueryTest, CountQSizes)
+{
+ // test case 1 has exactly one bamRecord.
+ string fn = test1fn;
+ vector<int> expected({1});
+ TestQNameQuery(fn, expected);
+ TestNoneConstQNameQuery(fn, expected);
+
+ // test case 2 has bamRecords of four subreads.
+ fn = test2fn;
+ expected = {1, 1, 1, 1};
+ TestQNameQuery(fn, expected);
+ TestNoneConstQNameQuery(fn, expected);
+
+ fn = test3fn;
+ expected = {2,1,1,1,1,1,1,2,1,1,1};
+ TestQNameQuery(fn, expected);
+ TestNoneConstQNameQuery(fn, expected);
+}
+
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/QualityValues.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(QualityValueTest, DefaultsOk)
+{
+ const QualityValue value;
+ EXPECT_EQ(0, value);
+ EXPECT_EQ('!', value.Fastq());
+}
+
+TEST(QualityValueTest, FromNumber)
+{
+ const QualityValue zero(0);
+ const QualityValue thirtyThree(33);
+ const QualityValue valid(42);
+ const QualityValue max(93);
+ const QualityValue tooHigh(94);
+ const QualityValue wayTooHigh(INT8_MAX);
+
+ EXPECT_EQ(0, zero);
+ EXPECT_EQ(33, thirtyThree);
+ EXPECT_EQ(42, valid);
+ EXPECT_EQ(93, max);
+ EXPECT_EQ(93, tooHigh);
+ EXPECT_EQ(93, wayTooHigh);
+
+ EXPECT_EQ('!', zero.Fastq());
+ EXPECT_EQ('B', thirtyThree.Fastq());
+ EXPECT_EQ('K', valid.Fastq());
+ EXPECT_EQ('~', max.Fastq());
+ EXPECT_EQ('~', tooHigh.Fastq());
+ EXPECT_EQ('~', wayTooHigh.Fastq());
+}
+
+TEST(QualityValueTest, FromFastq)
+{
+ const QualityValue zero = QualityValue::FromFastq('!');
+ const QualityValue thirtyThree = QualityValue::FromFastq('B');
+ const QualityValue valid = QualityValue::FromFastq('K');
+ const QualityValue max = QualityValue::FromFastq('~');
+
+ EXPECT_EQ(0, zero);
+ EXPECT_EQ(33, thirtyThree);
+ EXPECT_EQ(42, valid);
+ EXPECT_EQ(93, max);
+}
+
+TEST(QualityValuesTest, Default)
+{
+ const QualityValues qvs;
+ EXPECT_TRUE(qvs.empty());
+ EXPECT_EQ(string(), qvs.Fastq());
+}
+
+TEST(QualityValuesTest, FromNumbers)
+{
+ const string fastqString = "~~~KKBB!!";
+ const vector<uint8_t> values = { 93, 93, 93, 42, 42, 33, 33, 0, 0 };
+
+ QualityValues qvs;
+ for (auto qv : values)
+ qvs.push_back(qv);
+ EXPECT_EQ(fastqString, qvs.Fastq());
+}
+
+TEST(QualityValuesTest, FromFastq)
+{
+ const string fastqString = "~~~KKBB!!";
+ const vector<uint8_t> values = { 93, 93, 93, 42, 42, 33, 33, 0, 0 };
+
+ const QualityValues& qvs = QualityValues::FromFastq(fastqString);
+ EXPECT_EQ(fastqString.size(), qvs.size());
+ EXPECT_EQ(values.size(), qvs.size());
+ for (size_t i = 0; i < fastqString.size(); ++i)
+ EXPECT_EQ(values.at(i), qvs.at(i));
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/ReadAccuracyQuery.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(ReadAccuracyQueryTest, QueryOk)
+{
+ const auto bamFile = BamFile{ tests::Data_Dir + string{ "/group/test2.bam" } };
+
+ {
+ int count = 0;
+ ReadAccuracyQuery query(0.901, Compare::GREATER_THAN_EQUAL, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_GE(r.ReadAccuracy(), 0.901);
+ }
+ EXPECT_EQ(4, count);
+ }
+ {
+ int count = 0;
+ ReadAccuracyQuery query(0.95, Compare::GREATER_THAN_EQUAL, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_GE(r.ReadAccuracy(), 0.901);
+ }
+ EXPECT_EQ(0, count);
+ }
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett, Lance Hepler
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/ReadGroupInfo.h>
+#include <string>
+#include <vector>
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(ReadGroupInfoTest, IdFromMovieNameAndReadType)
+{
+ ReadGroupInfo rg("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0", "HQREGION");
+ EXPECT_EQ("00082ba1", rg.Id());
+}
+
+TEST(ReadGroupInfoTest, FrameCodecSetOk)
+{
+ ReadGroupInfo rg("test");
+ rg.IpdCodec(FrameCodec::V1);
+ EXPECT_TRUE(rg.HasBaseFeature(BaseFeature::IPD));
+ EXPECT_EQ("ip", rg.BaseFeatureTag(BaseFeature::IPD));
+ EXPECT_EQ(FrameCodec::V1, rg.IpdCodec());
+}
+
+TEST(ReadGroupInfoTest, SequencingChemistryOk)
+{
+ { // P6-C4
+ const string& chem = "P6-C4";
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100356300","100356200","2.1"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100356300","100356200","2.3"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100356300","100612400","2.1"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100356300","100612400","2.3"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100372700","100356200","2.1"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100372700","100356200","2.3"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100372700","100612400","2.1"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100372700","100612400","2.3"));
+
+ ReadGroupInfo rg("dummy");
+ rg.BindingKit("100356300")
+ .SequencingKit("100356200")
+ .BasecallerVersion("2.1");
+ EXPECT_EQ(chem, rg.SequencingChemistry());
+ }
+
+ { // S/P1-C1/beta
+ const string& chem = "S/P1-C1/beta";
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-620-000","3.0"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-620-000","3.1"));
+
+ ReadGroupInfo rg("dummy");
+ rg.BindingKit("100-619-300")
+ .SequencingKit("100-620-000")
+ .BasecallerVersion("3.0");
+ EXPECT_EQ(chem, rg.SequencingChemistry());
+ }
+
+ { // S/P1-C1.1 (Echidna)
+ const string& chem = "S/P1-C1.1";
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-867-300","3.1"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-867-300","3.2"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-867-300","3.3"));
+
+ ReadGroupInfo rg("dummy");
+ rg.BindingKit("100-619-300")
+ .SequencingKit("100-867-300")
+ .BasecallerVersion("3.1");
+ EXPECT_EQ(chem, rg.SequencingChemistry());
+ }
+
+ { // S/P1-C1.2 (Flea)
+ const string& chem = "S/P1-C1.2";
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-902-100","3.1"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-902-100","3.2"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-902-100","3.3"));
+
+ ReadGroupInfo rg("dummy");
+ rg.BindingKit("100-619-300")
+ .SequencingKit("100-902-100")
+ .BasecallerVersion("3.1");
+ EXPECT_EQ(chem, rg.SequencingChemistry());
+ }
+ { // S/P1-C1.3 (Goat)
+ const string& chem = "S/P1-C1.3";
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-972-200","3.2"));
+ EXPECT_EQ(chem, ReadGroupInfo::SequencingChemistryFromTriple("100-619-300","100-972-200","3.3"));
+
+ ReadGroupInfo rg("dummy");
+ rg.BindingKit("100-619-300")
+ .SequencingKit("100-972-200")
+ .BasecallerVersion("3.3");
+ EXPECT_EQ(chem, rg.SequencingChemistry());
+ }
+}
+
+TEST(ReadGroupInfoTest, SequencingChemistryThrowsOnBadTriple)
+{
+ // check that we actually throw
+ ReadGroupInfo rg("BAD");
+ rg.BindingKit("100372700")
+ .SequencingKit("100-619-400")
+ .BasecallerVersion("2.0");
+ EXPECT_THROW(rg.SequencingChemistry(), InvalidSequencingChemistryException);
+
+ // now check thrown contents
+ try {
+ ReadGroupInfo rg("BAD");
+ rg.BindingKit("100372700")
+ .SequencingKit("100-619-400")
+ .BasecallerVersion("2.0");
+ } catch (InvalidSequencingChemistryException& e) {
+ EXPECT_EQ(string("100372700"), e.BindingKit());
+ EXPECT_EQ(string("100-619-400"), e.SequencingKit());
+ EXPECT_EQ(string("2.0"), e.BasecallerVersion());
+ }
+}
+
+TEST(ReadGroupInfoTest, BasecallerVersion)
+{
+ // too short
+ try {
+ ReadGroupInfo rg("dummy");
+ rg.BindingKit("100-619-300")
+ .SequencingKit("100-867-300")
+ .BasecallerVersion("3");
+ const string chem = rg.SequencingChemistry();
+ (void)chem;
+
+ } catch (std::runtime_error& e) {
+ EXPECT_EQ(string("basecaller version too short: 3"), string(e.what()));
+ }
+
+ // initial implementation assumed single digit version numbers:
+ // const string ver{ basecallerVersion.substr(0, 3) };
+ // So '3.299.dummy' would incorrectly be interpreted as (OK) '3.2'.
+ // 3.
+
+ try {
+ ReadGroupInfo rg("dummy");
+ rg.BindingKit("100-619-300")
+ .SequencingKit("100-867-300")
+ .BasecallerVersion("3.199.dummy");
+ const string chem = rg.SequencingChemistry();
+ (void)chem;
+
+ } catch (InvalidSequencingChemistryException& e) {
+ EXPECT_EQ("100-619-300", e.BindingKit());
+ EXPECT_EQ("100-867-300", e.SequencingKit());
+ EXPECT_EQ("3.199.dummy", e.BasecallerVersion());
+ }
+ //EXPECT_THROW(rg.SequencingChemistry(), InvalidSequencingChemistryException);
+}
+
+TEST(ReadGroupInfoTest, ClearBaseFeatures)
+{
+ ReadGroupInfo rg("test");
+ rg.BaseFeatureTag(BaseFeature::DELETION_QV, "dq");
+ rg.BaseFeatureTag(BaseFeature::DELETION_TAG, "dt");
+ rg.BaseFeatureTag(BaseFeature::INSERTION_QV, "iq");
+ rg.BaseFeatureTag(BaseFeature::MERGE_QV, "mq");
+ rg.BaseFeatureTag(BaseFeature::SUBSTITUTION_QV, "sq");
+ EXPECT_TRUE(rg.HasBaseFeature(BaseFeature::DELETION_QV));
+ EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV));
+
+ rg.ClearBaseFeatures();
+ EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::DELETION_QV));
+ EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::DELETION_TAG));
+ EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::INSERTION_QV));
+ EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::MERGE_QV));
+ EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::SUBSTITUTION_QV));
+}
+
+TEST(ReadGroupInfoTest, RemoveBaseFeature)
+{
+ ReadGroupInfo rg("test");
+ rg.BaseFeatureTag(BaseFeature::DELETION_QV, "dq");
+ rg.BaseFeatureTag(BaseFeature::DELETION_TAG, "dt");
+ rg.BaseFeatureTag(BaseFeature::INSERTION_QV, "iq");
+ rg.BaseFeatureTag(BaseFeature::MERGE_QV, "mq");
+ rg.BaseFeatureTag(BaseFeature::SUBSTITUTION_QV, "sq");
+ EXPECT_TRUE(rg.HasBaseFeature(BaseFeature::DELETION_QV));
+ EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV));
+
+ rg.RemoveBaseFeature(BaseFeature::DELETION_QV);
+ EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::DELETION_QV));
+
+ EXPECT_TRUE(rg.HasBaseFeature(BaseFeature::DELETION_TAG));
+ EXPECT_TRUE(rg.HasBaseFeature(BaseFeature::INSERTION_QV));
+ EXPECT_TRUE(rg.HasBaseFeature(BaseFeature::MERGE_QV));
+ EXPECT_TRUE(rg.HasBaseFeature(BaseFeature::SUBSTITUTION_QV));
+}
+
--- /dev/null
+// Copyright (c) 2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/SamWriter.h>
+#include <fstream>
+#include <iostream>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(SamWriterTest, HeaderOk)
+{
+ // setup header
+ const string hdrText = {
+ "@HD\tVN:1.1\tSO:unknown\tpb:3.0.3\n"
+ "@RG\tID:6002b307\tPL:PACBIO\tDS:READTYPE=SUBREAD;BINDINGKIT=100-619-300;"
+ "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100\t"
+ "PU:test\tPM:SEQUEL\n"
+ };
+
+ EXPECT_NO_THROW(
+ {
+ // write header to file
+ const string generatedFn = tests::GeneratedData_Dir + "/samwriter_hdr_only.sam";
+ {
+ const BamHeader inputHeader(hdrText);
+ SamWriter writer(generatedFn, inputHeader);
+ (void)writer;
+ };
+
+ // check header
+ {
+ ifstream f(generatedFn);
+ const string text((istreambuf_iterator<char>(f)),
+ istreambuf_iterator<char>());
+ EXPECT_EQ(hdrText, text);
+ }
+
+ // clean up
+ remove(generatedFn.c_str());
+ });
+}
+
+TEST(SamWriterTest, SingleRecordOk)
+{
+
+ // setup header
+ const string hdrLine1 = { "@HD\tVN:1.1\tSO:unknown\tpb:3.0.3" };
+ const string hdrLine2 = {
+ "@RG\tID:6002b307\tPL:PACBIO\tDS:READTYPE=SUBREAD;BINDINGKIT=100-619-300;"
+ "SEQUENCINGKIT=100-619-400;BASECALLERVERSION=3.0;FRAMERATEHZ=100\t"
+ "PU:test\tPM:SEQUEL"
+ };
+ const string hdrText = hdrLine1 + "\n" + hdrLine2 + "\n";
+ const BamHeader inputHeader(hdrText);
+
+ // setup record
+ BamRecord record(inputHeader);
+ record.Impl().Name("test/100/0_5");
+ record.Impl().SetSequenceAndQualities("ACGTC", 5, "@@@@@");
+ record.Impl().CigarData("");
+ record.Impl().Bin(0);
+ record.Impl().Flag(0);
+ record.Impl().InsertSize(0);
+ record.Impl().MapQuality(0);
+ record.Impl().MatePosition(-1);
+ record.Impl().MateReferenceId(-1);
+ record.Impl().Position(-1);
+ record.Impl().ReferenceId(-1);
+ record.Impl().SetMapped(false);
+
+ TagCollection tags;
+ tags["zm"] = static_cast<int32_t>(100);
+ tags["qs"] = static_cast<Position>(0);
+ tags["qe"] = static_cast<Position>(5);
+ tags["np"] = static_cast<int32_t>(1);
+ tags["rq"] = static_cast<float>(0.6);
+ tags["RG"] = std::string{ "6002b307" };
+ tags["sn"] = vector<float>{0.2f,0.2f,0.2f,0.2f};
+ record.Impl().Tags(tags);
+
+ const string expectedSamRecord = {
+ "test/100/0_5\t4\t*\t0\t0\t*\t*\t0\t0\tACGTC\t@@@@@\tRG:Z:6002b307\t"
+ "np:i:1\tqe:i:5\tqs:i:0\trq:f:0.6\tsn:B:f,0.2,0.2,0.2,0.2\tzm:i:100"
+ };
+
+ EXPECT_NO_THROW(
+ {
+ // write data to file
+ const string generatedFn = tests::GeneratedData_Dir + "/samwriter_hdr_and_record.sam";
+ {
+ SamWriter writer(generatedFn, inputHeader);
+ writer.Write(record);
+ };
+
+ // check header & record
+ {
+ ifstream f(generatedFn);
+ string line1;
+ string line2;
+ string line3;
+ std::getline(f, line1);
+ std::getline(f, line2);
+ std::getline(f, line3);
+ EXPECT_EQ(hdrLine1, line1);
+ EXPECT_EQ(hdrLine2, line2);
+ EXPECT_EQ(expectedSamRecord, line3);
+ }
+
+ // cleanup
+ remove(generatedFn.c_str());
+ });
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/../../src/SequenceUtils.h>
+#include <string>
+#include <vector>
+#include <climits>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+TEST(SequenceUtilsTest, ComplementChar)
+{
+ // complement
+ const char A = 'A'; // T
+ const char B = 'B'; // V
+ const char C = 'C'; // G
+ const char D = 'D'; // H
+ const char E = 'E'; // null
+ const char F = 'F'; // null
+ const char G = 'G'; // C
+ const char H = 'H'; // D
+ const char I = 'I'; // null
+ const char J = 'J'; // null
+ const char K = 'K'; // M
+ const char L = 'L'; // null
+ const char M = 'M'; // K
+ const char N = 'N'; // N
+ const char O = 'O'; // null
+ const char P = 'P'; // null
+ const char Q = 'Q'; // null
+ const char R = 'R'; // Y
+ const char S = 'S'; // S
+ const char T = 'T'; // A
+ const char U = 'U'; // A
+ const char V = 'V'; // B
+ const char W = 'W'; // W
+ const char X = 'X'; // null
+ const char Y = 'Y'; // R
+ const char Z = 'Z'; // null
+
+ EXPECT_EQ(T, Complement(A));
+ EXPECT_EQ(V, Complement(B));
+ EXPECT_EQ(G, Complement(C));
+ EXPECT_EQ(H, Complement(D));
+ EXPECT_EQ(0, Complement(E));
+ EXPECT_EQ(0, Complement(F));
+ EXPECT_EQ(C, Complement(G));
+ EXPECT_EQ(D, Complement(H));
+ EXPECT_EQ(0, Complement(I));
+ EXPECT_EQ(0, Complement(J));
+ EXPECT_EQ(M, Complement(K));
+ EXPECT_EQ(0, Complement(L));
+ EXPECT_EQ(K, Complement(M));
+ EXPECT_EQ(N, Complement(N));
+ EXPECT_EQ(0, Complement(O));
+ EXPECT_EQ(0, Complement(P));
+ EXPECT_EQ(0, Complement(Q));
+ EXPECT_EQ(Y, Complement(R));
+ EXPECT_EQ(S, Complement(S));
+ EXPECT_EQ(A, Complement(T));
+ EXPECT_EQ(A, Complement(U));
+ EXPECT_EQ(B, Complement(V));
+ EXPECT_EQ(W, Complement(W));
+ EXPECT_EQ(0, Complement(X));
+ EXPECT_EQ(R, Complement(Y));
+ EXPECT_EQ(0, Complement(Z));
+}
+
+TEST(SequenceUtilsTest, ReverseComplement)
+{
+ string input1 = "ATATATCCCGGCG";
+ const string rc1 = "CGCCGGGATATAT";
+
+ ReverseComplement(input1);
+ EXPECT_EQ(rc1, input1);
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/../../src/StringUtils.h>
+#include <string>
+#include <vector>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+TEST(StringUtilsTest, BasicSplit)
+{
+ const string test = "foo\tbar\tbaz";
+ const vector<string> tokens = internal::Split(test, '\t');
+ EXPECT_EQ(3, tokens.size());
+ EXPECT_TRUE(tokens.at(0) == "foo");
+ EXPECT_TRUE(tokens.at(1) == "bar");
+ EXPECT_TRUE(tokens.at(2) == "baz");
+}
+
+TEST(StringUtilsTest, SplitKeepsEmptyTokens)
+{
+ const string test = "foo\tbar\t\tbaz";
+ const vector<string> tokens = internal::Split(test, '\t');
+ EXPECT_EQ(4, tokens.size());
+ EXPECT_TRUE(tokens.at(0) == "foo");
+ EXPECT_TRUE(tokens.at(1) == "bar");
+ EXPECT_TRUE(tokens.at(2) == "");
+ EXPECT_TRUE(tokens.at(3) == "baz");
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/SubreadLengthQuery.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(SubreadLengthQueryTest, QueryOk)
+{
+ const auto bamFile = BamFile{ tests::Data_Dir + string{ "/group/test2.bam" } };
+
+ {
+ int count = 0;
+ SubreadLengthQuery query(500, Compare::GREATER_THAN_EQUAL, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_GE((r.QueryEnd() - r.QueryStart()), 500);
+ }
+ EXPECT_EQ(3, count);
+ }
+ {
+ int count = 0;
+ SubreadLengthQuery query(1000, Compare::GREATER_THAN_EQUAL, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_GE((r.QueryEnd() - r.QueryStart()), 1000);
+ }
+ EXPECT_EQ(2, count);
+ }
+ {
+ int count = 0;
+ SubreadLengthQuery query(5000, Compare::GREATER_THAN_EQUAL, bamFile);
+ for (const auto& r: query) {
+ ++count;
+ EXPECT_GE((r.QueryEnd() - r.QueryStart()), 5000);
+ }
+ EXPECT_EQ(0, count);
+ }
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <boost/type_traits/is_convertible.hpp>
+#include <gtest/gtest.h>
+#include <pbbam/BamTagCodec.h>
+#include <pbbam/TagCollection.h>
+#include <pbbam/SamTagCodec.h>
+#include <algorithm>
+#include <iostream>
+#include <map>
+#include <string>
+
+#include <typeinfo>
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+TEST(TagTest, TagConstruction)
+{
+ int8_t i8 = 0;
+ uint8_t u8 = 0;
+ int16_t i16 = 0;
+ uint16_t u16 = 0;
+ int32_t i32 = 0;
+ uint32_t u32 = 0;
+ float f = 0.0;
+ string str = "";
+ vector<int8_t> i8_array;
+ vector<uint8_t> u8_array;
+ vector<int16_t> i16_array;
+ vector<uint16_t> u16_array;
+ vector<int32_t> i32_array;
+ vector<uint32_t> u32_Array;
+ vector<float> float_array;
+
+ signed char c = 'A';
+ unsigned char uc = 'A';
+
+ Tag i8Tag(i8);
+ Tag u8Tag(u8);
+ Tag i16Tag(i16);
+ Tag u16Tag(u16);
+ Tag i32Tag(i32);
+ Tag u32Tag(u32);
+ Tag floatTag(f);
+ Tag stringTag(str);
+ Tag i8_array_Tag(i8_array);
+ Tag u8_array_Tag(u8_array);
+ Tag i16_array_Tag(i16_array);
+ Tag u16_array_Tag(u16_array);
+ Tag i32_array_Tag(i32_array);
+ Tag u32_array_Tag(u32_Array);
+ Tag float_array_Tag(float_array);
+
+ Tag charTag(c, TagModifier::ASCII_CHAR);
+ Tag ucharTag(uc, TagModifier::ASCII_CHAR);
+
+ EXPECT_TRUE(i8Tag.Type() == TagDataType::INT8);
+ EXPECT_TRUE(u8Tag.Type() == TagDataType::UINT8);
+ EXPECT_TRUE(i16Tag.Type() == TagDataType::INT16);
+ EXPECT_TRUE(u16Tag.Type() == TagDataType::UINT16);
+ EXPECT_TRUE(i32Tag.Type() == TagDataType::INT32);
+ EXPECT_TRUE(u32Tag.Type() == TagDataType::UINT32);
+ EXPECT_TRUE(floatTag.Type() == TagDataType::FLOAT);
+ EXPECT_TRUE(stringTag.Type() == TagDataType::STRING);
+ EXPECT_TRUE(i8_array_Tag.Type() == TagDataType::INT8_ARRAY);
+ EXPECT_TRUE(u8_array_Tag.Type() == TagDataType::UINT8_ARRAY);
+ EXPECT_TRUE(i16_array_Tag.Type() == TagDataType::INT16_ARRAY);
+ EXPECT_TRUE(u16_array_Tag.Type() == TagDataType::UINT16_ARRAY);
+ EXPECT_TRUE(i32_array_Tag.Type() == TagDataType::INT32_ARRAY);
+ EXPECT_TRUE(u32_array_Tag.Type() == TagDataType::UINT32_ARRAY);
+ EXPECT_TRUE(float_array_Tag.Type() == TagDataType::FLOAT_ARRAY);
+
+ EXPECT_TRUE(charTag.ToAscii() == 'A');
+ EXPECT_TRUE(ucharTag.ToAscii() == 'A');
+}
+
+TEST(TagTest, CopyAndCompare)
+{
+ int8_t i8 = 0;
+ uint8_t u8 = 0;
+ int16_t i16 = 0;
+ uint16_t u16 = 0;
+ int32_t i32 = 0;
+ uint32_t u32 = 0;
+ float f = 0.0;
+ string str = "";
+ vector<int8_t> i8_array;
+ vector<uint8_t> u8_array;
+ vector<int16_t> i16_array;
+ vector<uint16_t> u16_array;
+ vector<int32_t> i32_array;
+ vector<uint32_t> u32_Array;
+ vector<float> float_array;
+
+ Tag i8Tag(i8);
+ Tag u8Tag(u8);
+ Tag i16Tag(i16);
+ Tag u16Tag(u16);
+ Tag i32Tag(i32);
+ Tag u32Tag(u32);
+ Tag floatTag(f);
+ Tag stringTag(str);
+ Tag i8_array_Tag(i8_array);
+ Tag u8_array_Tag(u8_array);
+ Tag i16_array_Tag(i16_array);
+ Tag u16_array_Tag(u16_array);
+ Tag i32_array_Tag(i32_array);
+ Tag u32_array_Tag(u32_Array);
+ Tag float_array_Tag(float_array);
+
+ Tag i8Tag2 = i8Tag;
+ Tag u8Tag2 = u8Tag;
+ Tag i16Tag2 = i16Tag;
+ Tag u16Tag2 = u16Tag;
+ Tag i32Tag2 = i32Tag;
+ Tag u32Tag2 = u32Tag;
+ Tag floatTag2 = floatTag;
+ Tag stringTag2 = stringTag;
+ Tag i8_array_Tag2 = i8_array_Tag;
+ Tag u8_array_Tag2 = u8_array_Tag;
+ Tag i16_array_Tag2 = i16_array_Tag;
+ Tag u16_array_Tag2 = u16_array_Tag;
+ Tag i32_array_Tag2 = i32_array_Tag;
+ Tag u32_array_Tag2 = u32_array_Tag;
+ Tag float_array_Tag2 = float_array_Tag;
+
+ EXPECT_EQ(i8Tag, i8Tag2);
+ EXPECT_EQ(u8Tag, u8Tag2);
+ EXPECT_EQ(i16Tag, i16Tag2);
+ EXPECT_EQ(u16Tag, u16Tag2);
+ EXPECT_EQ(i32Tag, i32Tag2);
+ EXPECT_EQ(u32Tag, u32Tag2);
+ EXPECT_EQ(floatTag, floatTag2);
+ EXPECT_EQ(stringTag, stringTag2);
+ EXPECT_EQ(i8_array_Tag, i8_array_Tag2);
+ EXPECT_EQ(u8_array_Tag, u8_array_Tag2);
+ EXPECT_EQ(i16_array_Tag, i16_array_Tag2);
+ EXPECT_EQ(u16_array_Tag, u16_array_Tag2);
+ EXPECT_EQ(i32_array_Tag, i32_array_Tag2);
+ EXPECT_EQ(u32_array_Tag, u32_array_Tag2);
+ EXPECT_EQ(float_array_Tag, float_array_Tag2);
+}
+
+TEST(TagTest, Type_None)
+{
+ Tag tag;
+
+ EXPECT_TRUE(tag.Type() == TagDataType::INVALID);
+ EXPECT_TRUE(tag.IsNull());
+ EXPECT_TRUE(tag.Typename() == "none");
+
+ EXPECT_FALSE(tag.IsNumeric());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsArray());
+}
+
+TEST(TagTest, Type_Int8)
+{
+ const int8_t v = -42;
+ const Tag tag(v);
+
+ int8_t v2;
+ EXPECT_NO_THROW(v2 = tag.ToInt8());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::INT8);
+ EXPECT_TRUE(tag.Typename() == "int8_t");
+ EXPECT_TRUE(tag.IsInt8());
+
+ EXPECT_TRUE(tag.IsSignedInt());
+ EXPECT_TRUE(tag.IsIntegral());
+ EXPECT_TRUE(tag.IsNumeric());
+
+ EXPECT_FALSE(tag.IsUnsignedInt());
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsFloat());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsArray());
+
+ EXPECT_EQ(v, v2);
+}
+
+TEST(TagTest, Type_UInt8)
+{
+ const uint8_t v = 42;
+ const Tag tag(v);
+
+ uint8_t v2;
+ EXPECT_NO_THROW(v2 = tag.ToUInt8());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::UINT8);
+ EXPECT_TRUE(tag.Typename() == "uint8_t");
+ EXPECT_TRUE(tag.IsUInt8());
+
+ EXPECT_TRUE(tag.IsUnsignedInt());
+ EXPECT_TRUE(tag.IsIntegral());
+ EXPECT_TRUE(tag.IsNumeric());
+
+ EXPECT_FALSE(tag.IsSignedInt());
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsFloat());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsArray());
+
+ EXPECT_EQ(v, v2);
+}
+
+TEST(TagTest, Type_Ascii)
+{
+ const char c = '$';
+ const signed char sc = '$';
+ const unsigned char uc = '$';
+ const uint8_t u8 = 65;
+ const int8_t i8 = 66;
+
+ { // old style: construct-then-modify
+
+ Tag fromPlainChar = Tag(c);
+ Tag fromSignedChar = Tag(sc);
+ Tag fromUnsignedChar = Tag(uc);
+ Tag fromUint8 = Tag(u8);
+ Tag fromInt8 = Tag(i8);
+ fromPlainChar.Modifier(TagModifier::ASCII_CHAR);
+ fromSignedChar.Modifier(TagModifier::ASCII_CHAR);
+ fromUnsignedChar.Modifier(TagModifier::ASCII_CHAR);
+ fromUint8.Modifier(TagModifier::ASCII_CHAR);
+ fromInt8.Modifier(TagModifier::ASCII_CHAR);
+
+ EXPECT_TRUE(fromPlainChar.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromPlainChar.IsIntegral());
+ EXPECT_TRUE(fromPlainChar.IsNumeric());
+ EXPECT_EQ('$', fromPlainChar.ToAscii());
+
+ EXPECT_TRUE(fromSignedChar.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromSignedChar.IsIntegral());
+ EXPECT_TRUE(fromSignedChar.IsNumeric());
+ EXPECT_EQ('$', fromSignedChar.ToAscii());
+
+ EXPECT_TRUE(fromUnsignedChar.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromUnsignedChar.IsIntegral());
+ EXPECT_TRUE(fromUnsignedChar.IsNumeric());
+ EXPECT_EQ('$', fromUnsignedChar.ToAscii());
+
+ EXPECT_TRUE(fromUint8.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromUint8.IsIntegral());
+ EXPECT_TRUE(fromUint8.IsNumeric());
+ EXPECT_EQ('A', fromUint8.ToAscii());
+
+ EXPECT_TRUE(fromInt8.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromInt8.IsIntegral());
+ EXPECT_TRUE(fromInt8.IsNumeric());
+ EXPECT_EQ('B', fromInt8.ToAscii());
+ }
+
+ { // new style: construct directly as ASCII
+
+ const Tag fromPlainChar = Tag(c, TagModifier::ASCII_CHAR);
+ const Tag fromSignedChar = Tag(sc, TagModifier::ASCII_CHAR);
+ const Tag fromUnsignedChar = Tag(uc, TagModifier::ASCII_CHAR);
+ const Tag fromUint8 = Tag(u8, TagModifier::ASCII_CHAR);
+ const Tag fromInt8 = Tag(i8, TagModifier::ASCII_CHAR);
+
+ EXPECT_TRUE(fromPlainChar.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromPlainChar.IsIntegral());
+ EXPECT_TRUE(fromPlainChar.IsNumeric());
+ EXPECT_EQ('$', fromPlainChar.ToAscii());
+
+ EXPECT_TRUE(fromSignedChar.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromSignedChar.IsIntegral());
+ EXPECT_TRUE(fromSignedChar.IsNumeric());
+ EXPECT_EQ('$', fromSignedChar.ToAscii());
+
+ EXPECT_TRUE(fromUnsignedChar.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromUnsignedChar.IsIntegral());
+ EXPECT_TRUE(fromUnsignedChar.IsNumeric());
+ EXPECT_EQ('$', fromUnsignedChar.ToAscii());
+
+ EXPECT_TRUE(fromUint8.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromUint8.IsIntegral());
+ EXPECT_TRUE(fromUint8.IsNumeric());
+ EXPECT_EQ('A', fromUint8.ToAscii());
+
+ EXPECT_TRUE(fromInt8.HasModifier(TagModifier::ASCII_CHAR));
+ EXPECT_TRUE(fromInt8.IsIntegral());
+ EXPECT_TRUE(fromInt8.IsNumeric());
+ EXPECT_EQ('B', fromInt8.ToAscii());
+ }
+
+ // check invalid constructs
+ EXPECT_THROW(Tag('A', TagModifier::HEX_STRING), std::runtime_error);
+}
+
+TEST(TagTest, Type_Int16)
+{
+ const int16_t v = -42;
+ const Tag tag(v);
+
+ int16_t v2;
+ EXPECT_NO_THROW(v2 = tag.ToInt16());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::INT16);
+ EXPECT_TRUE(tag.Typename() == "int16_t");
+ EXPECT_TRUE(tag.IsInt16());
+ EXPECT_TRUE(tag.IsSignedInt());
+ EXPECT_TRUE(tag.IsIntegral());
+ EXPECT_TRUE(tag.IsNumeric());
+
+ EXPECT_FALSE(tag.IsUnsignedInt());
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsFloat());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsArray());
+
+ EXPECT_EQ(v, v2);
+}
+
+TEST(TagTest, Type_UInt16)
+{
+ const uint16_t v = 42;
+ const Tag tag(v);
+
+ uint16_t v2;
+ EXPECT_NO_THROW(v2 = tag.ToUInt16());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::UINT16);
+ EXPECT_TRUE(tag.Typename() == "uint16_t");
+ EXPECT_TRUE(tag.IsUInt16());
+ EXPECT_TRUE(tag.IsUnsignedInt());
+ EXPECT_TRUE(tag.IsIntegral());
+ EXPECT_TRUE(tag.IsNumeric());
+
+ EXPECT_FALSE(tag.IsSignedInt());
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsFloat());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsArray());
+
+ EXPECT_EQ(v, v2);
+}
+
+TEST(TagTest, Type_Int32)
+{
+ const int32_t v = -42;
+ const Tag tag(v);
+
+ int32_t v2;
+ EXPECT_NO_THROW(v2 = tag.ToInt32());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::INT32);
+ EXPECT_TRUE(tag.Typename() == "int32_t");
+ EXPECT_TRUE(tag.IsInt32());
+ EXPECT_TRUE(tag.IsSignedInt());
+ EXPECT_TRUE(tag.IsIntegral());
+ EXPECT_TRUE(tag.IsNumeric());
+
+ EXPECT_FALSE(tag.IsUnsignedInt());
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsFloat());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsArray());
+
+ EXPECT_EQ(v, v2);
+}
+
+TEST(TagTest, Type_UInt32)
+{
+ const uint32_t v = 42;
+ const Tag tag(v);
+
+ uint32_t v2;
+ EXPECT_NO_THROW(v2 = tag.ToUInt32());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::UINT32);
+ EXPECT_TRUE(tag.Typename() == "uint32_t");
+ EXPECT_TRUE(tag.IsUInt32());
+ EXPECT_TRUE(tag.IsUnsignedInt());
+ EXPECT_TRUE(tag.IsIntegral());
+ EXPECT_TRUE(tag.IsNumeric());
+
+ EXPECT_FALSE(tag.IsSignedInt());
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsFloat());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsArray());
+
+ EXPECT_EQ(v, v2);
+}
+
+TEST(TagTest, Type_Float)
+{
+ const float v = 3.141;
+ const Tag tag(v);
+
+ float v2;
+ EXPECT_NO_THROW(v2 = tag.ToFloat());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::FLOAT);
+ EXPECT_TRUE(tag.Typename() == "float");
+ EXPECT_TRUE(tag.IsFloat());
+ EXPECT_TRUE(tag.IsNumeric());
+
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsIntegral());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsArray());
+
+ EXPECT_EQ(v, v2);
+}
+
+TEST(TagTest, Type_String)
+{
+ const string v = "foo_who";
+ const Tag tag(v);
+
+ string v2;
+ EXPECT_NO_THROW(v2 = tag.ToString());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::STRING);
+ EXPECT_TRUE(tag.Typename() == "string");
+ EXPECT_TRUE(tag.IsString());
+
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsNumeric());
+ EXPECT_FALSE(tag.IsArray());
+
+ EXPECT_EQ(v, v2);
+
+ // "Hex format" string
+ const Tag hex("DEADBEEF", TagModifier::HEX_STRING);
+ EXPECT_TRUE(hex.Type() == TagDataType::STRING);
+ EXPECT_TRUE(hex.Typename() == "string");
+ EXPECT_TRUE(hex.IsString());
+ EXPECT_TRUE(hex.HasModifier(TagModifier::HEX_STRING));
+ EXPECT_FALSE(hex.IsNull());
+ EXPECT_FALSE(hex.IsNumeric());
+ EXPECT_FALSE(hex.IsArray());
+
+ // check invalid constructs
+ EXPECT_THROW(Tag("DEADBEEF", TagModifier::ASCII_CHAR), std::runtime_error);
+}
+
+TEST(TagTest, Type_Int8Array)
+{
+ const vector<int8_t> v = { -42, 100, 0 };
+ const Tag tag(v);
+
+ vector<int8_t> v2;
+ EXPECT_NO_THROW(v2 = tag.ToInt8Array());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::INT8_ARRAY);
+ EXPECT_TRUE(tag.Typename() == "vector<int8_t>");
+ EXPECT_TRUE(tag.IsInt8Array());
+ EXPECT_TRUE(tag.IsSignedArray());
+ EXPECT_TRUE(tag.IsIntegralArray());
+ EXPECT_TRUE(tag.IsArray());
+
+ EXPECT_FALSE(tag.IsFloat());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsNumeric());
+
+ EXPECT_EQ(v, v2);
+}
+
+TEST(TagTest, Type_UInt8Array)
+{
+ const vector<uint8_t> v = { 42, 200, 0 };
+ const Tag tag(v);
+
+ vector<uint8_t> v2;
+ EXPECT_NO_THROW(v2 = tag.ToUInt8Array());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::UINT8_ARRAY);
+ EXPECT_TRUE(tag.Typename() == "vector<uint8_t>");
+ EXPECT_TRUE(tag.IsUInt8Array());
+ EXPECT_TRUE(tag.IsUnsignedArray());
+ EXPECT_TRUE(tag.IsIntegralArray());
+ EXPECT_TRUE(tag.IsArray());
+
+ EXPECT_FALSE(tag.IsFloat());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsNumeric());
+
+ EXPECT_EQ(v, v2);
+}
+
+TEST(TagTest, Type_Int16Array)
+{
+ const vector<int16_t> v = { 42, -300, 0 };
+ const Tag tag(v);
+
+ vector<int16_t> v2;
+ EXPECT_NO_THROW(v2 = tag.ToInt16Array());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::INT16_ARRAY);
+ EXPECT_TRUE(tag.Typename() == "vector<int16_t>");
+ EXPECT_TRUE(tag.IsInt16Array());
+ EXPECT_TRUE(tag.IsSignedArray());
+ EXPECT_TRUE(tag.IsIntegralArray());
+ EXPECT_TRUE(tag.IsArray());
+
+ EXPECT_FALSE(tag.IsFloat());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsNumeric());
+
+ EXPECT_EQ(v, v2);
+}
+
+TEST(TagTest, Type_UInt16Array)
+{
+ const vector<uint16_t> v = { 42, 300, 0 };
+ const Tag tag(v);
+
+ vector<uint16_t> v2;
+ EXPECT_NO_THROW(v2 = tag.ToUInt16Array());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::UINT16_ARRAY);
+ EXPECT_TRUE(tag.Typename() == "vector<uint16_t>");
+ EXPECT_TRUE(tag.IsUInt16Array());
+ EXPECT_TRUE(tag.IsUnsignedArray());
+ EXPECT_TRUE(tag.IsIntegralArray());
+ EXPECT_TRUE(tag.IsArray());
+
+ EXPECT_FALSE(tag.IsFloat());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsNumeric());
+
+ EXPECT_EQ(v, v2);;
+}
+
+TEST(TagTest, Type_Int32Array)
+{
+ const vector<int32_t> v = { 42, -300, 0 };
+ const Tag tag(v);
+
+ vector<int32_t> v2;
+ EXPECT_NO_THROW(v2 = tag.ToInt32Array());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::INT32_ARRAY);
+ EXPECT_TRUE(tag.Typename() == "vector<int32_t>");
+ EXPECT_TRUE(tag.IsInt32Array());
+ EXPECT_TRUE(tag.IsSignedArray());
+ EXPECT_TRUE(tag.IsIntegralArray());
+ EXPECT_TRUE(tag.IsArray());
+
+ EXPECT_FALSE(tag.IsFloat());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsNumeric());
+
+ EXPECT_EQ(v, v2);
+}
+
+TEST(TagTest, Type_UInt32Array)
+{
+ const vector<uint32_t> v = { 42, 300, 0 };
+ const Tag tag(v);
+
+ vector<uint32_t> v2;
+ EXPECT_NO_THROW(v2 = tag.ToUInt32Array());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::UINT32_ARRAY);
+ EXPECT_TRUE(tag.Typename() == "vector<uint32_t>");
+ EXPECT_TRUE(tag.IsUInt32Array());
+ EXPECT_TRUE(tag.IsUnsignedArray());
+ EXPECT_TRUE(tag.IsIntegralArray());
+ EXPECT_TRUE(tag.IsArray());
+
+ EXPECT_FALSE(tag.IsFloat());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsNumeric());
+
+ EXPECT_EQ(v, v2);
+}
+
+TEST(TagTest, Type_FloatArray)
+{
+ const vector<float> v = { 1.1f, 1.2f, 1.3f };
+ const Tag tag(v);
+
+ vector<float> v2;
+ EXPECT_NO_THROW(v2 = tag.ToFloatArray());
+
+ EXPECT_TRUE(tag.Type() == TagDataType::FLOAT_ARRAY);
+ EXPECT_TRUE(tag.Typename() == "vector<float>");
+ EXPECT_TRUE(tag.IsFloatArray());
+ EXPECT_TRUE(tag.IsArray());
+
+ EXPECT_FALSE(tag.IsIntegralArray());
+ EXPECT_FALSE(tag.IsFloat());
+ EXPECT_FALSE(tag.IsString());
+ EXPECT_FALSE(tag.IsNull());
+ EXPECT_FALSE(tag.IsNumeric());
+
+ EXPECT_EQ(v, v2);
+}
+
+TEST(TagTest, CastBackToOriginalOk)
+{
+ int8_t i8 = 0;
+ uint8_t u8 = 0;
+ int16_t i16 = 0;
+ uint16_t u16 = 0;
+ int32_t i32 = 0;
+ uint32_t u32 = 0;
+ float f = 0.0;
+ string str = "";
+ vector<int8_t> i8_array;
+ vector<uint8_t> u8_array;
+ vector<int16_t> i16_array;
+ vector<uint16_t> u16_array;
+ vector<int32_t> i32_array;
+ vector<uint32_t> u32_array;
+ vector<float> float_array;
+
+ Tag i8Tag(i8);
+ Tag u8Tag(u8);
+ Tag i16Tag(i16);
+ Tag u16Tag(u16);
+ Tag i32Tag(i32);
+ Tag u32Tag(u32);
+ Tag floatTag(f);
+ Tag stringTag(str);
+ Tag i8_array_Tag(i8_array);
+ Tag u8_array_Tag(u8_array);
+ Tag i16_array_Tag(i16_array);
+ Tag u16_array_Tag(u16_array);
+ Tag i32_array_Tag(i32_array);
+ Tag u32_array_Tag(u32_array);
+ Tag float_array_Tag(float_array);
+
+ EXPECT_NO_THROW({
+ i8 = i8Tag.ToInt8();
+ u8 = u8Tag.ToUInt8();
+ i16 = i16Tag.ToInt16();
+ u16 = u16Tag.ToUInt16();
+ i32 = i32Tag.ToInt32();
+ u32 = u32Tag.ToUInt32();
+ f = floatTag.ToFloat();
+ str = stringTag.ToString();
+ i8_array = i8_array_Tag.ToInt8Array();
+ u8_array = u8_array_Tag.ToUInt8Array();
+ i16_array = i16_array_Tag.ToInt16Array();
+ u16_array = u16_array_Tag.ToUInt16Array();
+ i32_array = i32_array_Tag.ToInt32Array();
+ u32_array = u32_array_Tag.ToUInt32Array();
+ float_array = float_array_Tag.ToFloatArray();
+ });
+}
+
+TEST(TagTest, ConvertToInt8)
+{
+ Tag zero(int32_t(0));
+ Tag min(int32_t(INT8_MIN));
+ Tag normal(int32_t(42));
+ Tag max(int32_t(INT8_MAX));
+ Tag underflow(int32_t(INT8_MIN-1));
+ Tag overflow(int32_t(INT8_MAX+1));
+ Tag floatTag(float(3.14));
+ Tag stringTag(string("foo"));
+ Tag arrayTag(vector<int8_t>({1, 2, 3}));
+
+ // allowed
+ EXPECT_NO_THROW(
+ {
+ zero.ToInt8();
+ min.ToInt8();
+ normal.ToInt8();
+ max.ToInt8();
+ });
+
+ // not allowed
+ EXPECT_THROW(underflow.ToInt8(), std::exception);
+ EXPECT_THROW(overflow.ToInt8(), std::exception);
+ EXPECT_THROW(floatTag.ToInt8(), std::exception);
+ EXPECT_THROW(stringTag.ToInt8(), std::exception);
+ EXPECT_THROW(arrayTag.ToInt8(), std::exception);
+}
+
+TEST(TagTest, ConvertToUInt8)
+{
+ Tag zero(int32_t(0));
+ Tag neg(int32_t(-1));
+ Tag normal(int32_t(42));
+ Tag max(int32_t(UINT8_MAX));
+ Tag overflow(int32_t(UINT8_MAX+1));
+ Tag floatTag(float(3.14));
+ Tag stringTag(string("foo"));
+ Tag arrayTag(vector<uint8_t>({1, 2, 3}));
+
+ // allowed
+ EXPECT_NO_THROW(
+ {
+ zero.ToUInt8();
+ normal.ToUInt8();
+ max.ToUInt8();
+ });
+
+ // not allowed
+ EXPECT_THROW(neg.ToUInt8(), std::exception);
+ EXPECT_THROW(overflow.ToUInt8(), std::exception);
+ EXPECT_THROW(floatTag.ToUInt8(), std::exception);
+ EXPECT_THROW(stringTag.ToUInt8(), std::exception);
+ EXPECT_THROW(arrayTag.ToUInt8(), std::exception);
+}
+
+TEST(TagTest, ConvertToInt16)
+{
+ Tag zero(int32_t(0));
+ Tag min(int32_t(INT16_MIN));
+ Tag normal(int32_t(42));
+ Tag max(int32_t(INT16_MAX));
+ Tag underflow(int32_t(INT16_MIN-1));
+ Tag overflow(int32_t(INT16_MAX+1));
+ Tag floatTag(float(3.14));
+ Tag stringTag(string("foo"));
+ Tag arrayTag(vector<int16_t>({1, 2, 3}));
+
+ // allowed
+ EXPECT_NO_THROW(
+ {
+ zero.ToInt16();
+ min.ToInt16();
+ normal.ToInt16();
+ max.ToInt16();
+ });
+
+ // not allowed
+ EXPECT_THROW(underflow.ToInt16(), std::exception);
+ EXPECT_THROW(overflow.ToInt16(), std::exception);
+ EXPECT_THROW(floatTag.ToInt16(), std::exception);
+ EXPECT_THROW(stringTag.ToInt16(), std::exception);
+ EXPECT_THROW(arrayTag.ToInt16(), std::exception);
+}
+
+TEST(TagTest, ConvertToUInt16)
+{
+ Tag zero(int32_t(0));
+ Tag neg(int32_t(-1));
+ Tag normal(int32_t(42));
+ Tag max(int32_t(UINT16_MAX));
+ Tag overflow(int32_t(UINT16_MAX+1));
+ Tag floatTag(float(3.14));
+ Tag stringTag(string("foo"));
+ Tag arrayTag(vector<uint16_t>({1, 2, 3}));
+
+ // allowed
+ EXPECT_NO_THROW(
+ {
+ zero.ToUInt16();
+ normal.ToUInt16();
+ max.ToUInt16();
+ });
+
+ // not allowed
+ EXPECT_THROW(neg.ToUInt16(), std::exception);
+ EXPECT_THROW(overflow.ToUInt16(), std::exception);
+ EXPECT_THROW(floatTag.ToUInt16(), std::exception);
+ EXPECT_THROW(stringTag.ToUInt16(), std::exception);
+ EXPECT_THROW(arrayTag.ToUInt16(), std::exception);
+}
+
+TEST(TagTest, ConvertToInt32)
+{
+ Tag zero(int32_t(0));
+ Tag min(int32_t(INT32_MIN));
+ Tag normal(int32_t(42));
+ Tag max(int32_t(INT32_MAX));
+ Tag floatTag(float(3.14));
+ Tag stringTag(string("foo"));
+ Tag arrayTag(vector<int32_t>({1, 2, 3}));
+
+ // no 64-bit ctors - will not compile
+ //
+ // Tag underflow(int64_t(INT32_MIN-1));
+ // Tag overflow(int64_t(INT32_MAX+1));
+
+ // allowed
+ EXPECT_NO_THROW(
+ {
+ zero.ToInt32();
+ min.ToInt32();
+ normal.ToInt32();
+ max.ToInt32();
+ });
+
+ // not allowed
+ EXPECT_THROW(floatTag.ToInt32(), std::exception);
+ EXPECT_THROW(stringTag.ToInt32(), std::exception);
+ EXPECT_THROW(arrayTag.ToInt32(), std::exception);
+}
+
+TEST(TagTest, ConvertToUInt32)
+{
+ Tag zero(int32_t(0));
+ Tag neg(int32_t(-1));
+ Tag normal(int32_t(42));
+ Tag max(uint32_t(UINT32_MAX));
+ Tag floatTag(float(3.14));
+ Tag stringTag(string("foo"));
+ Tag arrayTag(vector<uint32_t>({1, 2, 3}));
+
+ // no 64-bit ctors - will not compile
+ //
+ // Tag overflow(int64_t(UINT32_MAX+1));
+
+ // allowed
+ EXPECT_NO_THROW(
+ {
+ zero.ToUInt32();
+ normal.ToUInt32();
+ max.ToUInt32();
+ });
+
+ // not allowed
+ EXPECT_THROW(neg.ToUInt32(), std::exception);
+ EXPECT_THROW(floatTag.ToUInt32(), std::exception);
+ EXPECT_THROW(stringTag.ToUInt32(), std::exception);
+ EXPECT_THROW(arrayTag.ToUInt32(), std::exception);
+}
+
+TEST(TagCollectionTest, DefaultConstruction)
+{
+ TagCollection tags;
+ EXPECT_TRUE(tags.empty());
+ EXPECT_FALSE(tags.Contains("XY"));
+}
+
+TEST(TagCollectionTest, AddSimpleTags)
+{
+ const int32_t intValue = -42;
+ const string strValue = "foo";
+ const string hexStrValue = "1abc75";
+
+ TagCollection tags;
+ tags["ST"] = strValue;
+ tags["XY"] = intValue;
+ tags["HX"] = hexStrValue;
+ tags["HX"].Modifier(TagModifier::HEX_STRING);
+
+ EXPECT_EQ(3, tags.size());
+ EXPECT_TRUE(tags.Contains("XY"));
+ EXPECT_TRUE(tags.Contains("ST"));
+ EXPECT_TRUE(tags.Contains("HX"));
+ EXPECT_FALSE(tags.Contains("ZZ"));
+
+ EXPECT_TRUE( tags["XY"].ToInt32() == intValue );
+ EXPECT_TRUE( tags["ST"].ToString() == strValue );
+ EXPECT_TRUE( tags["HX"].ToString() == hexStrValue );
+ EXPECT_TRUE( tags["HX"].HasModifier(TagModifier::HEX_STRING) );
+}
+
+TEST(SamTagCodecTest, DecodeTest)
+{
+ string tagString;
+ tagString.append("HX:H:1abc75");
+ tagString.append("\t");
+ tagString.append("ST:Z:foo");
+ tagString.append("\t");
+ tagString.append("VC:B:i,42,-100,37,2048");
+ tagString.append("\t");
+ tagString.append("XY:i:-42");
+
+ TagCollection expected;
+ expected["ST"] = string("foo");
+ expected["XY"] = int32_t(-42);
+ expected["HX"] = Tag("1abc75", TagModifier::HEX_STRING);
+ expected["VC"] = vector<int32_t>( { 42, -100, 37, 2048 } );
+
+ TagCollection tags = SamTagCodec::Decode(tagString);
+
+ EXPECT_TRUE(tags.Contains("ST"));
+ EXPECT_TRUE(tags.Contains("HX"));
+ EXPECT_TRUE(tags.Contains("XY"));
+ EXPECT_TRUE(tags.Contains("VC"));
+
+ EXPECT_EQ(string("foo"), tags["ST"].ToString());
+ EXPECT_TRUE(tags["HX"].HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(string("1abc75"), tags["HX"].ToString());
+ EXPECT_EQ((int8_t)-42, tags["XY"].ToInt8());
+ EXPECT_EQ(vector<int32_t>( { 42, -100, 37, 2048 } ), tags["VC"].ToInt32Array());
+}
+
+TEST(SamTagCodecTest, EncodeTest)
+{
+ TagCollection tags;
+ tags["ST"] = string("foo");
+ tags["XY"] = int32_t(-42);
+ tags["HX"] = Tag("1abc75", TagModifier::HEX_STRING);
+ tags["VC"] = vector<int32_t>( { 42, -100, 37, 2048 } );
+
+ // "HX:H:1abc75\tST:Z:foo\0\tVC:B:i,42,-100,37,2048\tXY:i:-42"
+ string expected;
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("ST:Z:foo");
+ expected.append("\t");
+ expected.append("VC:B:i,42,-100,37,2048");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const string sam = SamTagCodec::Encode(tags);
+ EXPECT_EQ(expected, sam);
+}
+
+TEST(BamTagCodecTest, DecodeTest)
+{
+ vector<uint8_t> data;
+ data.push_back(uint8_t('H'));
+ data.push_back(uint8_t('X'));
+ data.push_back(uint8_t('H'));
+ data.push_back(uint8_t('1'));
+ data.push_back(uint8_t('a'));
+ data.push_back(uint8_t('b'));
+ data.push_back(uint8_t('c'));
+ data.push_back(uint8_t('7'));
+ data.push_back(uint8_t('5'));
+ data.push_back(uint8_t(0));
+
+ data.push_back(uint8_t('X'));
+ data.push_back(uint8_t('Y'));
+ data.push_back(uint8_t('i'));
+ const int32_t x = -42;
+ char valueBytes[sizeof x];
+ std::copy(static_cast<const char*>(static_cast<const void*>(&x)),
+ static_cast<const char*>(static_cast<const void*>(&x)) + sizeof x,
+ valueBytes);
+ data.push_back(valueBytes[0]);
+ data.push_back(valueBytes[1]);
+ data.push_back(valueBytes[2]);
+ data.push_back(valueBytes[3]);
+
+ data.push_back('C');
+ data.push_back('A');
+ data.push_back('B');
+ data.push_back('C');
+ const uint32_t numChars = 3;
+ char numCharsValueBytes[sizeof numChars];
+ std::copy(static_cast<const char*>(static_cast<const void*>(&numChars)),
+ static_cast<const char*>(static_cast<const void*>(&numChars)) + sizeof numChars,
+ numCharsValueBytes);
+ data.push_back(numCharsValueBytes[0]);
+ data.push_back(numCharsValueBytes[1]);
+ data.push_back(numCharsValueBytes[2]);
+ data.push_back(numCharsValueBytes[3]);
+
+ const vector<uint8_t> charArray = vector<uint8_t>({34, 5, 125});
+ data.push_back(charArray.at(0));
+ data.push_back(charArray.at(1));
+ data.push_back(charArray.at(2));
+
+ TagCollection tags = BamTagCodec::Decode(data);
+
+ EXPECT_TRUE(tags["HX"].HasModifier(TagModifier::HEX_STRING));
+ EXPECT_EQ(string("1abc75"), tags["HX"].ToString());
+ EXPECT_EQ(x, tags["XY"].ToInt32());
+ EXPECT_EQ(charArray, tags["CA"].ToUInt8Array());
+
+ // sanity check - convert tags back to SAM
+ string expected;
+ expected.append("CA:B:C,34,5,125");
+ expected.append("\t");
+ expected.append("HX:H:1abc75");
+ expected.append("\t");
+ expected.append("XY:i:-42");
+
+ const string sam = SamTagCodec::Encode(tags);
+ EXPECT_EQ(expected, sam);
+}
+
+TEST(BamTagCodecTest, EncodeTest)
+{
+ vector<uint8_t> expected;
+
+ expected.push_back('C');
+ expected.push_back('A');
+ expected.push_back('B');
+ expected.push_back('C');
+ const uint32_t numChars = 3;
+ char numCharsValueBytes[sizeof numChars];
+ std::copy(static_cast<const char*>(static_cast<const void*>(&numChars)),
+ static_cast<const char*>(static_cast<const void*>(&numChars)) + sizeof numChars,
+ numCharsValueBytes);
+ expected.push_back(numCharsValueBytes[0]);
+ expected.push_back(numCharsValueBytes[1]);
+ expected.push_back(numCharsValueBytes[2]);
+ expected.push_back(numCharsValueBytes[3]);
+
+ const vector<uint8_t> charArray = vector<uint8_t>({34, 5, 125});
+ expected.push_back(charArray.at(0));
+ expected.push_back(charArray.at(1));
+ expected.push_back(charArray.at(2));
+
+ expected.push_back(uint8_t('H'));
+ expected.push_back(uint8_t('X'));
+ expected.push_back(uint8_t('H'));
+ expected.push_back(uint8_t('1'));
+ expected.push_back(uint8_t('a'));
+ expected.push_back(uint8_t('b'));
+ expected.push_back(uint8_t('c'));
+ expected.push_back(uint8_t('7'));
+ expected.push_back(uint8_t('5'));
+ expected.push_back(uint8_t(0));
+
+ expected.push_back(uint8_t('X'));
+ expected.push_back(uint8_t('Y'));
+ expected.push_back(uint8_t('i'));
+ const int32_t x = -42;
+ char valueBytes[sizeof x];
+ std::copy(static_cast<const char*>(static_cast<const void*>(&x)),
+ static_cast<const char*>(static_cast<const void*>(&x)) + sizeof x,
+ valueBytes);
+ expected.push_back(valueBytes[0]);
+ expected.push_back(valueBytes[1]);
+ expected.push_back(valueBytes[2]);
+ expected.push_back(valueBytes[3]);
+
+ TagCollection tags;
+ tags["HX"] = Tag("1abc75", TagModifier::HEX_STRING);
+ tags["CA"] = charArray;
+ tags["XY"] = x;
+
+ const vector<uint8_t>& data = BamTagCodec::Encode(tags);
+ EXPECT_EQ(expected, data);
+}
+
+TEST(BamTagCodecTest, AsciiTagsTest)
+{
+ vector<uint8_t> expected;
+ expected.reserve(20);
+ expected.push_back('I'); // I8:A:B
+ expected.push_back('8');
+ expected.push_back('A');
+ expected.push_back('B');
+ expected.push_back('P'); // PC:A:$
+ expected.push_back('C');
+ expected.push_back('A');
+ expected.push_back('$');
+ expected.push_back('S'); // SC:A:$
+ expected.push_back('C');
+ expected.push_back('A');
+ expected.push_back('$');
+ expected.push_back('U'); // U8:A:A
+ expected.push_back('8');
+ expected.push_back('A');
+ expected.push_back('A');
+ expected.push_back('U'); // UC:A:$
+ expected.push_back('C');
+ expected.push_back('A');
+ expected.push_back('$');
+
+ const char c = '$';
+ const signed char sc = '$';
+ const unsigned char uc = '$';
+ const uint8_t u8 = 65;
+ const int8_t i8 = 66;
+
+ { // old style: construct-then-modify
+
+ Tag fromPlainChar = Tag(c);
+ Tag fromSignedChar = Tag(sc);
+ Tag fromUnsignedChar = Tag(uc);
+ Tag fromUint8 = Tag(u8);
+ Tag fromInt8 = Tag(i8);
+ fromPlainChar.Modifier(TagModifier::ASCII_CHAR);
+ fromSignedChar.Modifier(TagModifier::ASCII_CHAR);
+ fromUnsignedChar.Modifier(TagModifier::ASCII_CHAR);
+ fromUint8.Modifier(TagModifier::ASCII_CHAR);
+ fromInt8.Modifier(TagModifier::ASCII_CHAR);
+
+ TagCollection tags;
+ tags["PC"] = fromPlainChar;
+ tags["SC"] = fromSignedChar;
+ tags["UC"] = fromUnsignedChar;
+ tags["U8"] = fromUint8;
+ tags["I8"] = fromInt8;
+
+ const vector<uint8_t>& data = BamTagCodec::Encode(tags);
+ EXPECT_EQ(expected, data);
+ }
+
+ { // new style: construct directly as ASCII
+
+ const Tag fromPlainChar = Tag(c, TagModifier::ASCII_CHAR);
+ const Tag fromSignedChar = Tag(sc, TagModifier::ASCII_CHAR);
+ const Tag fromUnsignedChar = Tag(uc, TagModifier::ASCII_CHAR);
+ const Tag fromUint8 = Tag(u8, TagModifier::ASCII_CHAR);
+ const Tag fromInt8 = Tag(i8, TagModifier::ASCII_CHAR);
+
+ TagCollection tags;
+ tags["PC"] = fromPlainChar;
+ tags["SC"] = fromSignedChar;
+ tags["UC"] = fromUnsignedChar;
+ tags["U8"] = fromUint8;
+ tags["I8"] = fromInt8;
+
+ const vector<uint8_t>& data = BamTagCodec::Encode(tags);
+ EXPECT_EQ(expected, data);
+ }
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/../../src/TimeUtils.h>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+TEST(TimeUtilsTest, ToIso8601)
+{
+ const time_t rawTime = 436428750L;
+ const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
+
+ const auto expected = string{ "1983-10-31T06:12:30Z" }; // no ms in test case
+ const auto actual = internal::ToIso8601(timestamp);
+ EXPECT_EQ(expected, actual);
+}
+
+TEST(TimeUtilsTest, ToDataSetFormat)
+{
+ const time_t rawTime = 436428750L;
+ const auto timestamp = std::chrono::system_clock::from_time_t(rawTime);
+
+ const auto expected = string{ "831031_061230" }; // no ms in test case
+ const std::string& actual = internal::ToDataSetFormat(timestamp);
+ EXPECT_EQ(expected, actual);
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+//#ifdef PBBAM_TESTING
+//#define private public
+//#endif
+
+//#include "TestData.h"
+//#include <gtest/gtest.h>
+//#include <pbbam/EntireFileQuery.h>
+
+//#include <pbbam/UnmappedReadsQuery.h>
+//#include <string>
+//using namespace PacBio;
+//using namespace PacBio::BAM;
+//using namespace std;
+
+//const string inputBamFn1 = tests::Data_Dir + "/unmap1.bam";
+//const string inputBamFn2 = tests::Data_Dir + "/unmap2.bam";
+
+//TEST(UnmappedReadsQueryTest, UnmappedOnlyFile)
+//{
+// // open input BAM file
+// BamFile bamFile(inputBamFn1);
+// EXPECT_TRUE(bamFile);
+
+// // check all records, and save unmapped count
+// int count = 0;
+// int unmappedExpected = 0;
+// EntireFileQuery entireFile(bamFile);
+// EXPECT_TRUE(entireFile);
+// for ( const BamRecord& record : entireFile ) {
+// ++count;
+// if (!record.IsMapped())
+// ++unmappedExpected;
+// }
+// EXPECT_EQ(10, count);
+// EXPECT_EQ(10, unmappedExpected);
+
+// // query unmapped records only
+// int unmappedObserved = 0;
+// UnmappedReadsQuery unmappedReads(bamFile);
+// EXPECT_TRUE(unmappedReads);
+// for ( const BamRecord& record : unmappedReads ) {
+// EXPECT_FALSE(record.IsMapped());
+// ++unmappedObserved;
+// }
+// EXPECT_EQ(unmappedExpected, unmappedObserved);
+//}
+
+//TEST(UnmappedReadsQueryTest, MixedFile)
+//{
+// // open input BAM file
+// BamFile bamFile(inputBamFn2);
+// EXPECT_TRUE(bamFile);
+
+// // check all records, and save unmapped count
+// int count = 0;
+// int unmappedExpected = 0;
+// EntireFileQuery entireFile(bamFile);
+// EXPECT_TRUE(entireFile);
+// for ( const BamRecord& record : entireFile ) {
+// ++count;
+// if (!record.IsMapped())
+// ++unmappedExpected;
+// }
+// EXPECT_EQ(19, count);
+// EXPECT_EQ(9, unmappedExpected);
+
+// // query unmapped records only
+// int unmappedObserved = 0;
+// UnmappedReadsQuery unmappedReads(bamFile);
+// EXPECT_TRUE(unmappedReads);
+// for ( const BamRecord& record : unmappedReads ) {
+// EXPECT_FALSE(record.IsMapped());
+// ++unmappedObserved;
+// }
+// EXPECT_EQ(unmappedExpected, unmappedObserved);
+//}
+
+// TODO: handle no index case
+
+// TODO: additional special cases as needed
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include <gtest/gtest.h>
+#include <pbbam/BamFile.h>
+#include <pbbam/BamHeader.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/Cigar.h>
+#include <pbbam/ReadGroupInfo.h>
+#include <pbbam/Validator.h>
+
+#include "../src/StringUtils.h"
+#include "../src/ValidationErrors.h"
+
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
+static BamRecord makeValidMappedRecord(void)
+{
+ BamRecordImpl impl;
+ impl.Bin(4680);
+ impl.Flag(2);
+ impl.InsertSize(0);
+ impl.MapQuality(10);
+ impl.MatePosition(-1);
+ impl.MateReferenceId(-1);
+ impl.Name("movie1/54130/0_10");
+ impl.Position(1);
+ impl.ReferenceId(0);
+ impl.SetMapped(true);
+ impl.SetSequenceAndQualities("AATGAGGAGA");
+ impl.CigarData(Cigar{ "10=" });
+
+ TagCollection tags;
+ tags["RG"] = string{ "3f58e5b8" };
+ tags["dq"] = string{ "2222'$22'2" };
+ tags["dt"] = string{ "NNNNAGNNGN" };
+ tags["iq"] = string{ "(+#1'$#*1&" };
+ tags["mq"] = string{ "&1~51*5&~2" };
+ tags["sq"] = string{ "<32<4<<<<3" };
+ tags["ip"] = vector<uint8_t>{ 2,0,10,22,34,0,2,3,0,16 };
+ tags["np"] = static_cast<int32_t>(1);
+ tags["qe"] = static_cast<int32_t>(10);
+ tags["qs"] = static_cast<int32_t>(0);
+ tags["zm"] = static_cast<int32_t>(54130);
+ tags["cx"] = static_cast<int32_t>(2);
+ tags["AS"] = static_cast<int32_t>(-3020);
+ tags["NM"] = static_cast<int32_t>(134);
+ tags["rq"] = static_cast<float>(0.854);
+ tags["sn"] = vector<float>{ 2.0,2.0,2.0,2.0 };
+ impl.Tags(tags);
+
+ return BamRecord(impl);
+}
+
+static BamRecord makeValidUnmappedRecord(void)
+{
+ BamRecordImpl impl;
+ impl.Bin(4680);
+ impl.Flag(4);
+ impl.InsertSize(0);
+ impl.MapQuality(10);
+ impl.MatePosition(-1);
+ impl.MateReferenceId(-1);
+ impl.Name("m140906_231018_42161_c100676332550000001823129611271486_s1_p0/8/0_10");
+ impl.Position(-1);
+ impl.ReferenceId(-1);
+ impl.SetSequenceAndQualities("AATGAGGAGA");
+
+ TagCollection tags;
+ tags["RG"] = string{ "b5482b33" };
+ tags["dq"] = string{ "2222222222" };
+ tags["dt"] = string{ "NNNNNNNNNN" };
+ tags["iq"] = string{ ",*11111001" };
+ tags["mq"] = string{ "&47088')34" };
+ tags["sq"] = string{ "8<4<:<6<0<" };
+ tags["ip"] = vector<uint8_t>{ 255,9,20,43,38,12,9,30,39,22 };
+ tags["np"] = static_cast<int32_t>(1);
+ tags["qe"] = static_cast<int32_t>(10);
+ tags["qs"] = static_cast<int32_t>(0);
+ tags["zm"] = static_cast<int32_t>(8);
+ tags["cx"] = static_cast<int32_t>(2);
+ tags["AS"] = static_cast<int32_t>(-3020);
+ tags["NM"] = static_cast<int32_t>(134);
+ tags["rq"] = static_cast<float>(0.811);
+ tags["sn"] = vector<float>{ 2.0,2.0,2.0,2.0 };
+ impl.Tags(tags);
+
+ return BamRecord(impl);
+}
+
+static ReadGroupInfo makeValidReadGroup(void)
+{
+ ReadGroupInfo rg("f5b4ffb6");
+ rg.MovieName("movie32");
+ rg.ReadType("CCS");
+ rg.BindingKit("100372700");
+ rg.SequencingKit("100612400");
+ rg.BasecallerVersion("2.3");
+ rg.FrameRateHz("100");
+ rg.Control("TRUE");
+ return rg;
+}
+
+// valid, 'starter' objects
+static const ReadGroupInfo validReadGroup = makeValidReadGroup();
+static const BamRecord validMappedRecord = makeValidMappedRecord();
+static const BamRecord validUnmappedRecord = makeValidUnmappedRecord();
+
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
+TEST(ValidatorErrorsTest, SetMaxNumErrors)
+{
+ { // default - use "no max"
+ internal::ValidationErrors errors;
+ EXPECT_EQ(internal::ValidationErrors::MAX, errors.maxNumErrors_);
+ }
+ { // max of zero doesn't make sense... make equivalent to "no max"
+ internal::ValidationErrors errors(0);
+ EXPECT_EQ(internal::ValidationErrors::MAX, errors.maxNumErrors_);
+ }
+ { // max = 1
+ internal::ValidationErrors errors(1);
+ EXPECT_EQ(1, errors.maxNumErrors_);
+ }
+ { // max = 10
+ internal::ValidationErrors errors(10);
+ EXPECT_EQ(10, errors.maxNumErrors_);
+ }
+}
+
+TEST(ValidatorErrorsTest, ThrowOnMaxReached)
+{
+ {
+ internal::ValidationErrors errors(1);
+ EXPECT_THROW(errors.AddFileError("foo", "you"), ValidationException);
+ }
+ {
+ internal::ValidationErrors errors(2);
+ errors.AddFileError("foo", "you");
+ EXPECT_THROW(errors.AddFileError("foo", "me"), ValidationException);
+ }
+}
+
+TEST(ValidatorErrorsTest, ExceptionFromResults)
+{
+ const string error1 = "error1";
+ const string error2 = "error2";
+
+ try {
+
+ internal::ValidationErrors errors(4);
+ errors.AddFileError("path/to/foo.bam", error1);
+ errors.AddFileError("path/to/foo.bam", error2);
+ errors.AddReadGroupError("deadbeef", "invalid sequencing chemistry combination detected");
+ errors.AddRecordError("m140906_231018_42161_c100676332550000001823129611271486_s1_p0/8/0_10",
+ "MergeQV does not match expected length");
+
+ } catch (ValidationException& e) {
+
+ EXPECT_EQ(1, e.FileErrors().size()); // only 1 file
+ EXPECT_EQ(2, e.FileErrors().at("path/to/foo.bam").size()); // 2 errors for this file
+ EXPECT_EQ(1, e.ReadGroupErrors().size());
+ EXPECT_EQ(1, e.RecordErrors().size());
+ }
+}
+
+TEST(ValidatorTest, ValidReadGroup)
+{
+ ASSERT_NO_THROW(Validator::Validate(tests::validReadGroup));
+}
+
+TEST(ValidatorTest, ReadGroupRequiredComponents)
+{
+ { // missing ID
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.Id("");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+ { // missing movie name
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.MovieName("");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+ { // missing read type
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.ReadType("");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+ { // missing binding kit
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.BindingKit("");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+ { // missing sequencing kit
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.SequencingKit("");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+ { // missing basecaller version
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.BasecallerVersion("");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+ { // missing frame rate
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.FrameRateHz("");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+}
+
+TEST(ValidatorTest, ReadGroupValues)
+{
+ { // mismatch expected ID vs stored ID - change ID
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.Id("deadbeef");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+ { // mismatch expected ID vs stored ID - change read type
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.ReadType("SUBREAD");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+ { // mismatch expected ID vs stored ID - change movie name
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.MovieName("foo");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+ { // unknown read type
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.ReadType("FOO");
+
+ // recompute ID so we're only checking the new read type, not read ID
+ rg.Id( MakeReadGroupId(rg.MovieName(), rg.ReadType()) );
+
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+ { // invalid chemistry triple - change binding kit
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.BindingKit("foo");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+ { // invalid chemistry triple - change sequencing kit
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.SequencingKit("foo");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+ { // invalid chemistry triple - change basecaller version
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.BasecallerVersion("0.42");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+ { // non-numeric frame rate
+ ReadGroupInfo rg = tests::validReadGroup;
+ rg.FrameRateHz("foo");
+ EXPECT_THROW(Validator::Validate(rg), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(rg));
+ }
+}
+
+TEST(ValidatorTest, ValidHeader)
+{
+ const BamHeader validMappedHeader {
+ "@HD\tVN:1.5\tSO:coordinate\tpb:3.0.1\n"
+ "@SQ\tSN:ecoliK12_pbi_March2013_2955000_to_2980000\tLN:25000\tM5:734d5f3b2859595f4bd87a2fe6b7389b\n"
+ "@RG\tID:3f58e5b8\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;"
+ "MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BASECALLERVERSION=2.1;"
+ "FRAMERATEHZ=75.000000;BINDINGKIT=100356300;SEQUENCINGKIT=100356200"
+ "\tPU:movie1\n"
+ };
+
+ const BamHeader validUnmappedHeader {
+ "@HD\tVN:1.5\tSO:unknown\tpb:3.0.1\n"
+ "@RG\tID:b5482b33\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+ "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.1;FRAMERATEHZ=75.000000\t"
+ "PU:m140906_231018_42161_c100676332550000001823129611271486_s1_p0\n"
+ };
+
+ ASSERT_NO_THROW(Validator::Validate(validMappedHeader));
+ ASSERT_NO_THROW(Validator::Validate(validUnmappedHeader));
+}
+
+TEST(ValidatorTest, ValidateHeader)
+{
+ const BamHeader validMappedHeader {
+ "@HD\tVN:1.5\tSO:coordinate\tpb:3.0.1\n"
+ "@SQ\tSN:ecoliK12_pbi_March2013_2955000_to_2980000\tLN:25000\tM5:734d5f3b2859595f4bd87a2fe6b7389b\n"
+ "@RG\tID:3f58e5b8\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;"
+ "MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BASECALLERVERSION=2.1;"
+ "FRAMERATEHZ=75.000000;BINDINGKIT=100356300;SEQUENCINGKIT=100356200"
+ "\tPU:movie1\n"
+ };
+
+ { // invalid SAM version - non-numeric
+ BamHeader header = validMappedHeader.DeepCopy();
+ header.Version("foo");
+ EXPECT_THROW(Validator::Validate(header), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(header));
+ }
+ { // invalid SAM version - negative version numbers
+ BamHeader header = validMappedHeader.DeepCopy();
+ header.Version("-1.4.0");
+ EXPECT_THROW(Validator::Validate(header), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(header));
+ }
+ { // invalid sort order
+ BamHeader header = validMappedHeader.DeepCopy();
+ header.SortOrder("not_a_valid_sort_order");
+ EXPECT_THROW(Validator::Validate(header), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(header));
+ }
+
+ // invalid PacBioBamVersion numbers (non-numeric, negative, earlier than min)
+ // already throw when you try to set them... so we have to catch & ignore
+ // initial exception to get to validator
+
+ { // invalid PacBioBAM version - non-numeric
+ BamHeader header = validMappedHeader.DeepCopy();
+ try {
+ header.PacBioBamVersion("foo");
+ } catch (...) { }
+ EXPECT_THROW(Validator::Validate(header), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(header));
+ }
+ { // invalid PacBioBAM version - negative version numbers
+ BamHeader header = validMappedHeader.DeepCopy();
+ try {
+ header.PacBioBamVersion("-1.4.0");
+ } catch (...) { }
+ EXPECT_THROW(Validator::Validate(header), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(header));
+ }
+ { // invalid PacBioBAM version - earlier than minimum allowed
+ BamHeader header = validMappedHeader.DeepCopy();
+ try {
+ header.PacBioBamVersion("3.0.0");
+ } catch (...) { }
+ EXPECT_THROW(Validator::Validate(header), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(header));
+ }
+}
+
+TEST(ValidatorTest, ValidRecord)
+{
+ const BamHeader validMappedHeader {
+ "@HD\tVN:1.5\tSO:coordinate\tpb:3.0.1\n"
+ "@SQ\tSN:ecoliK12_pbi_March2013_2955000_to_2980000\tLN:25000\tM5:734d5f3b2859595f4bd87a2fe6b7389b\n"
+ "@RG\tID:3f58e5b8\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;"
+ "MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BASECALLERVERSION=2.1;"
+ "FRAMERATEHZ=75.000000;BINDINGKIT=100356300;SEQUENCINGKIT=100356200"
+ "\tPU:movie1\n"
+ };
+ BamRecord record(tests::validMappedRecord);
+ record.header_ = validMappedHeader;
+ ASSERT_NO_THROW(Validator::Validate(record));
+}
+
+static inline
+void ModifyTag(BamRecord* record,
+ const std::string& tagName,
+ const Tag& tag)
+{
+ if (record->Impl().HasTag(tagName))
+ record->Impl().EditTag(tagName, tag);
+ else
+ record->Impl().AddTag(tagName, tag);
+}
+
+static inline
+void CheckInvalidTagLength(const std::string& tagName, const Tag& tag)
+{
+ static const BamHeader validUnmappedHeader {
+ "@HD\tVN:1.5\tSO:unknown\tpb:3.0.1\n"
+ "@RG\tID:b5482b33\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+ "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.1;FRAMERATEHZ=75.000000\t"
+ "PU:m140906_231018_42161_c100676332550000001823129611271486_s1_p0\n"
+ };
+ BamRecord record(tests::validUnmappedRecord);
+ record.header_ = validUnmappedHeader;
+
+ ModifyTag(&record, tagName, tag);
+
+ EXPECT_THROW(Validator::Validate(record), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(record));
+}
+
+TEST(ValidatorTest, TagDataLengths)
+{
+ const BamHeader validUnmappedHeader {
+ "@HD\tVN:1.5\tSO:unknown\tpb:3.0.1\n"
+ "@RG\tID:b5482b33\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+ "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.1;FRAMERATEHZ=75.000000\t"
+ "PU:m140906_231018_42161_c100676332550000001823129611271486_s1_p0\n"
+ };
+
+ // make these "variable-length" SEQ/tags too short for the read's stated
+ // queryStart/queryEnd
+
+ { // SEQ
+ BamRecord record(tests::validUnmappedRecord);
+ record.header_ = validUnmappedHeader;
+ record.Impl().SetSequenceAndQualities("AA");
+ EXPECT_THROW(Validator::Validate(record), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(record));
+ }
+
+ CheckInvalidTagLength("dq", QualityValues("@@").Fastq()); // DeletionQV
+ CheckInvalidTagLength("iq", QualityValues("@@").Fastq()); // InsertionQV
+ CheckInvalidTagLength("mq", QualityValues("@@").Fastq()); // MergeQV
+ CheckInvalidTagLength("sq", QualityValues("@@").Fastq()); // SubstitutionQV
+ CheckInvalidTagLength("dt", string("AA")); // DeletionTag
+ CheckInvalidTagLength("st", string("AA")); // SubstitutionTag
+
+ const auto& f = Frames{ vector<uint16_t>{42, 42, 42} };
+ const auto& frames = f.Data();
+ CheckInvalidTagLength("ip", frames); // IPD
+
+ // NOTE: disabling "internal" tag checks for now, only checking "standard"
+ // PacBioBAM tags
+
+// const auto& pulses = vector<uint16_t>{42, 42, 42};
+// CheckInvalidTagLength("pv", QualityValues("@@").Fastq()); // AltLabelQV
+// CheckInvalidTagLength("pq", QualityValues("@@").Fastq()); // LabelQV
+// CheckInvalidTagLength("pg", QualityValues("@@").Fastq()); // PulseMergeQv
+// CheckInvalidTagLength("pt", string("AA")); // AltLabelTag
+// CheckInvalidTagLength("pc", string("AA")); // PulseCall
+// CheckInvalidTagLength("pd", frames); // PrePulseFrames
+// CheckInvalidTagLength("px", frames); // PulseCallWidth
+// CheckInvalidTagLength("pw", frames); // PulseWidth
+// CheckInvalidTagLength("pa", pulses); // Pkmean
+// CheckInvalidTagLength("ps", pulses); // Pkmean2
+// CheckInvalidTagLength("pm", pulses); // Pkmid
+// CheckInvalidTagLength("pi", pulses); // Pkmid2
+}
+
+TEST(ValidatorTest, TagDataValues)
+{
+ const BamHeader validMappedHeader {
+ "@HD\tVN:1.5\tSO:coordinate\tpb:3.0.1\n"
+ "@SQ\tSN:ecoliK12_pbi_March2013_2955000_to_2980000\tLN:25000\tM5:734d5f3b2859595f4bd87a2fe6b7389b\n"
+ "@RG\tID:3f58e5b8\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;InsertionQV=iq;"
+ "MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BASECALLERVERSION=2.1;"
+ "FRAMERATEHZ=75.000000;BINDINGKIT=100356300;SEQUENCINGKIT=100356200"
+ "\tPU:movie1\n"
+ };
+
+ { // missing qe
+ BamRecord record(tests::validMappedRecord);
+ record.header_ = validMappedHeader;
+ record.Impl().RemoveTag("qe");
+ EXPECT_THROW(Validator::Validate(record), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(record));
+ }
+ { // missing qs
+ BamRecord record(tests::validMappedRecord);
+ record.header_ = validMappedHeader;
+ record.Impl().RemoveTag("qs");
+ EXPECT_THROW(Validator::Validate(record), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(record));
+ }
+ { // queryStart should be < queryEnd
+ BamRecord record(tests::validMappedRecord);
+ record.header_ = validMappedHeader;
+ record.QueryStart(10);
+ EXPECT_THROW(Validator::Validate(record), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(record));
+ }
+ { // missing zm
+ BamRecord record(tests::validMappedRecord);
+ record.header_ = validMappedHeader;
+ record.Impl().RemoveTag("zm");
+ EXPECT_THROW(Validator::Validate(record), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(record));
+ }
+ { // missing np
+ BamRecord record(tests::validMappedRecord);
+ record.header_ = validMappedHeader;
+ record.Impl().RemoveTag("np");
+ EXPECT_THROW(Validator::Validate(record), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(record));
+ }
+ { // numPasses for SUBREAD type records should be 1
+ BamRecord record(tests::validMappedRecord);
+ record.header_ = validMappedHeader;
+ record.NumPasses(42);
+ EXPECT_THROW(Validator::Validate(record), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(record));
+ }
+ { // missing sn
+ BamRecord record(tests::validMappedRecord);
+ record.header_ = validMappedHeader;
+ record.Impl().RemoveTag("sn");
+ EXPECT_THROW(Validator::Validate(record), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(record));
+ }
+}
+
+TEST(ValidatorTest, MappedRecords)
+{
+ const BamHeader validMappedHeader {
+ "@HD\tVN:1.5\tSO:coordinate\tpb:3.0.1\n"
+ "@RG\tID:b5482b33\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+ "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.1;FRAMERATEHZ=75.000000\t"
+ "PU:m140906_231018_42161_c100676332550000001823129611271486_s1_p0\n"
+ };
+
+ { // mapped record should have valid refID
+ BamRecord record(tests::validMappedRecord);
+ record.header_ = validMappedHeader;
+ record.Impl().ReferenceId(-1);
+
+ EXPECT_THROW(Validator::Validate(record), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(record));
+ }
+ { // mapped record should have valid position
+ BamRecord record(tests::validMappedRecord);
+ record.header_ = validMappedHeader;
+ record.Impl().Position(-1);
+
+ EXPECT_THROW(Validator::Validate(record), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(record));
+ }
+
+}
+
+TEST(ValidatorTest, UnmappedRecords)
+{
+ const BamHeader validUnmappedHeader {
+ "@HD\tVN:1.5\tSO:unknown\tpb:3.0.1\n"
+ "@RG\tID:b5482b33\tPL:PACBIO\tDS:READTYPE=SUBREAD;DeletionQV=dq;DeletionTag=dt;"
+ "InsertionQV=iq;MergeQV=mq;SubstitutionQV=sq;Ipd:CodecV1=ip;BINDINGKIT=100356300;"
+ "SEQUENCINGKIT=100356200;BASECALLERVERSION=2.1;FRAMERATEHZ=75.000000\t"
+ "PU:m140906_231018_42161_c100676332550000001823129611271486_s1_p0\n"
+ };
+
+ { // unmapped should have no refID
+ BamRecord record(tests::validUnmappedRecord);
+ record.header_ = validUnmappedHeader;
+ record.Impl().ReferenceId(0);
+
+ EXPECT_THROW(Validator::Validate(record), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(record));
+ }
+ { // unmapped should have no position
+ BamRecord record(tests::validUnmappedRecord);
+ record.header_ = validUnmappedHeader;
+ record.Impl().Position(42);
+
+ EXPECT_THROW(Validator::Validate(record), ValidationException);
+ EXPECT_FALSE(Validator::IsValid(record));
+ }
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "../src/Version.h"
+
+#include <gtest/gtest.h>
+#include <sstream>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace PacBio::BAM::internal;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
+static inline Version MakeVersion(int x, int y, int z)
+{ return Version(x, y, z); }
+
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
+TEST(VersionTest, DefaultOk)
+{
+ Version v;
+ EXPECT_EQ(0, v.Major());
+ EXPECT_EQ(0, v.Minor());
+ EXPECT_EQ(0, v.Revision());
+}
+
+TEST(VersionTest, CopyAndMoveOk)
+{
+ { // copy ctor
+ Version v1(3,1,1);
+ EXPECT_EQ(3, v1.Major());
+ EXPECT_EQ(1, v1.Minor());
+ EXPECT_EQ(1, v1.Revision());
+
+ Version v2(v1);
+ EXPECT_EQ(3, v2.Major());
+ EXPECT_EQ(1, v2.Minor());
+ EXPECT_EQ(1, v2.Revision());
+ }
+ { // copy assign
+ Version v1(3,1,1);
+ EXPECT_EQ(3, v1.Major());
+ EXPECT_EQ(1, v1.Minor());
+ EXPECT_EQ(1, v1.Revision());
+
+ Version v2;
+ v2 = v1;
+ EXPECT_EQ(3, v2.Major());
+ EXPECT_EQ(1, v2.Minor());
+ EXPECT_EQ(1, v2.Revision());
+
+ }
+ { // move ctor
+ Version v(tests::MakeVersion(3,1,1));
+ EXPECT_EQ(3, v.Major());
+ EXPECT_EQ(1, v.Minor());
+ EXPECT_EQ(1, v.Revision());
+
+ }
+ { // move assign
+ Version v1(3,1,1);
+ EXPECT_EQ(3, v1.Major());
+ EXPECT_EQ(1, v1.Minor());
+ EXPECT_EQ(1, v1.Revision());
+
+ Version v2;
+ v2 = std::move(v1);
+ EXPECT_EQ(3, v2.Major());
+ EXPECT_EQ(1, v2.Minor());
+ EXPECT_EQ(1, v2.Revision());
+ }
+}
+
+TEST(VersionTest, FromIntsOk)
+{
+ { // normal
+ Version v(3,1,1);
+ EXPECT_EQ(3, v.Major());
+ EXPECT_EQ(1, v.Minor());
+ EXPECT_EQ(1, v.Revision());
+ }
+
+ // negatives
+ EXPECT_THROW(Version(-3, 1, 1), std::runtime_error);
+}
+
+TEST(VersionTest, FromStringOk)
+{
+ { // normal
+ Version v("3.1.1");
+ EXPECT_EQ(3, v.Major());
+ EXPECT_EQ(1, v.Minor());
+ EXPECT_EQ(1, v.Revision());
+ }
+
+ // negatives
+ EXPECT_THROW(Version("-3.1.1"), std::runtime_error);
+
+ // non-numeric
+ EXPECT_THROW(Version("foo.bar.baz"), std::runtime_error);
+
+ // empty
+ EXPECT_THROW(Version(""), std::runtime_error);
+}
+
+TEST(VersionTest, SettersOk)
+{
+ Version v(3,1,1);
+
+ v.Major(4);
+
+ EXPECT_EQ(4, v.Major());
+ EXPECT_EQ(1, v.Minor());
+ EXPECT_EQ(1, v.Revision());
+
+ v.Minor(7);
+
+ EXPECT_EQ(4, v.Major());
+ EXPECT_EQ(7, v.Minor());
+ EXPECT_EQ(1, v.Revision());
+
+ v.Revision(23);
+
+ EXPECT_EQ(4, v.Major());
+ EXPECT_EQ(7, v.Minor());
+ EXPECT_EQ(23, v.Revision());
+
+ { // invalid
+ Version v1(3,1,1);
+ Version v2(3,1,1);
+ Version v3(3,1,1);
+ EXPECT_THROW(v1.Major(-1), std::runtime_error);
+ EXPECT_THROW(v2.Minor(-1), std::runtime_error);
+ EXPECT_THROW(v3.Revision(-1), std::runtime_error);
+ }
+}
+
+TEST(VersionTest, ComparisonsOk)
+{
+ const Version v0_0_0 = Version(0,0,0);
+ const Version v0_0_4 = Version(0,0,4);
+ const Version v0_1_0 = Version(0,1,0);
+ const Version v0_1_4 = Version(0,1,4);
+ const Version v3_0_0 = Version(3,0,0);
+ const Version v3_0_4 = Version(3,0,4);
+ const Version v3_1_0 = Version(3,1,0);
+ const Version v3_1_4 = Version(3,1,4);
+ const Version v3_1_5 = Version(3,1,5);
+
+ // operator==
+ EXPECT_TRUE(v0_0_0 == v0_0_0);
+ EXPECT_TRUE(v3_0_0 == v3_0_0);
+ EXPECT_TRUE(v0_1_0 == v0_1_0);
+ EXPECT_TRUE(v0_0_4 == v0_0_4);
+ EXPECT_TRUE(v3_1_0 == v3_1_0);
+ EXPECT_TRUE(v3_1_4 == v3_1_4);
+
+ EXPECT_FALSE(v3_1_4 == v0_0_0);
+ EXPECT_FALSE(v3_1_4 == v3_0_0);
+ EXPECT_FALSE(v3_1_4 == v0_1_0);
+ EXPECT_FALSE(v3_1_4 == v0_0_4);
+ EXPECT_FALSE(v3_1_4 == v3_1_0);
+ EXPECT_FALSE(v3_1_4 == v3_1_5);
+
+ // operator!=
+ EXPECT_FALSE(v0_0_0 != v0_0_0);
+ EXPECT_FALSE(v3_0_0 != v3_0_0);
+ EXPECT_FALSE(v0_1_0 != v0_1_0);
+ EXPECT_FALSE(v0_0_4 != v0_0_4);
+ EXPECT_FALSE(v3_1_0 != v3_1_0);
+ EXPECT_FALSE(v3_1_4 != v3_1_4);
+
+ EXPECT_TRUE(v3_1_4 != v0_0_0);
+ EXPECT_TRUE(v3_1_4 != v3_0_0);
+ EXPECT_TRUE(v3_1_4 != v0_1_0);
+ EXPECT_TRUE(v3_1_4 != v0_0_4);
+ EXPECT_TRUE(v3_1_4 != v3_1_0);
+ EXPECT_TRUE(v3_1_4 != v3_1_5);
+
+ // operator<
+ EXPECT_FALSE(v0_0_0 < v0_0_0);
+ EXPECT_TRUE(v0_0_0 < v0_0_4);
+ EXPECT_TRUE(v0_0_0 < v0_1_0);
+ EXPECT_TRUE(v0_0_0 < v3_0_0);
+ EXPECT_TRUE(v0_0_0 < v0_1_4);
+ EXPECT_TRUE(v0_0_0 < v3_0_4);
+ EXPECT_TRUE(v0_0_0 < v3_1_0);
+ EXPECT_TRUE(v0_0_0 < v3_1_4);
+
+ EXPECT_TRUE(v0_0_4 < v3_1_4);
+ EXPECT_TRUE(v0_1_0 < v3_1_4);
+ EXPECT_TRUE(v0_1_4 < v3_1_4);
+ EXPECT_TRUE(v3_0_0 < v3_1_4);
+ EXPECT_TRUE(v3_0_4 < v3_1_4);
+ EXPECT_TRUE(v3_1_0 < v3_1_4);
+ EXPECT_FALSE(v3_1_4 < v3_1_4);
+ EXPECT_FALSE(v3_1_5 < v3_1_4);
+
+ EXPECT_FALSE(v3_1_4 < v0_0_0);
+
+ // operator<=
+ EXPECT_TRUE(v0_0_0 <= v0_0_0);
+ EXPECT_TRUE(v0_0_0 <= v0_0_4);
+ EXPECT_TRUE(v0_0_0 <= v0_1_0);
+ EXPECT_TRUE(v0_0_0 <= v3_0_0);
+ EXPECT_TRUE(v0_0_0 <= v0_1_4);
+ EXPECT_TRUE(v0_0_0 <= v3_0_4);
+ EXPECT_TRUE(v0_0_0 <= v3_1_0);
+ EXPECT_TRUE(v0_0_0 <= v3_1_4);
+
+ EXPECT_TRUE(v0_0_4 <= v3_1_4);
+ EXPECT_TRUE(v0_1_0 <= v3_1_4);
+ EXPECT_TRUE(v0_1_4 <= v3_1_4);
+ EXPECT_TRUE(v3_0_0 <= v3_1_4);
+ EXPECT_TRUE(v3_0_4 <= v3_1_4);
+ EXPECT_TRUE(v3_1_0 <= v3_1_4);
+ EXPECT_TRUE(v3_1_4 <= v3_1_4);
+ EXPECT_FALSE(v3_1_5 <= v3_1_4);
+
+ EXPECT_FALSE(v3_1_4 <= v0_0_0);
+
+ // operator>
+ EXPECT_FALSE(v0_0_0 > v0_0_0);
+ EXPECT_FALSE(v0_0_0 > v0_0_4);
+ EXPECT_FALSE(v0_0_0 > v0_1_0);
+ EXPECT_FALSE(v0_0_0 > v3_0_0);
+ EXPECT_FALSE(v0_0_0 > v0_1_4);
+ EXPECT_FALSE(v0_0_0 > v3_0_4);
+ EXPECT_FALSE(v0_0_0 > v3_1_0);
+ EXPECT_FALSE(v0_0_0 > v3_1_4);
+
+ EXPECT_FALSE(v0_0_4 > v3_1_4);
+ EXPECT_FALSE(v0_1_0 > v3_1_4);
+ EXPECT_FALSE(v0_1_4 > v3_1_4);
+ EXPECT_FALSE(v3_0_0 > v3_1_4);
+ EXPECT_FALSE(v3_0_4 > v3_1_4);
+ EXPECT_FALSE(v3_1_0 > v3_1_4);
+ EXPECT_FALSE(v3_1_4 > v3_1_4);
+ EXPECT_TRUE(v3_1_5 > v3_1_4);
+
+ EXPECT_TRUE(v3_1_4 > v0_0_0);
+
+ // operator>=
+ EXPECT_TRUE(v0_0_0 >= v0_0_0);
+ EXPECT_FALSE(v0_0_0 >= v0_0_4);
+ EXPECT_FALSE(v0_0_0 >= v0_1_0);
+ EXPECT_FALSE(v0_0_0 >= v3_0_0);
+ EXPECT_FALSE(v0_0_0 >= v0_1_4);
+ EXPECT_FALSE(v0_0_0 >= v3_0_4);
+ EXPECT_FALSE(v0_0_0 >= v3_1_0);
+ EXPECT_FALSE(v0_0_0 >= v3_1_4);
+
+ EXPECT_FALSE(v0_0_4 >= v3_1_4);
+ EXPECT_FALSE(v0_1_0 >= v3_1_4);
+ EXPECT_FALSE(v0_1_4 >= v3_1_4);
+ EXPECT_FALSE(v3_0_0 >= v3_1_4);
+ EXPECT_FALSE(v3_0_4 >= v3_1_4);
+ EXPECT_FALSE(v3_1_0 >= v3_1_4);
+ EXPECT_TRUE(v3_1_4 >= v3_1_4);
+ EXPECT_TRUE(v3_1_5 >= v3_1_4);
+
+ EXPECT_TRUE(v3_1_4 >= v0_0_0);
+}
+
+TEST(VersionTest, ToStringOk)
+{
+ {
+ Version v(0,0,0);
+ EXPECT_EQ(string("0.0.0"), v.ToString());
+ }
+ {
+ Version v(3,1,4);
+ EXPECT_EQ(string("3.1.4"), v.ToString());
+ }
+ {
+ Version v;
+ v.Major(4);
+ EXPECT_EQ(string("4.0.0"), v.ToString());
+ }
+ {
+ const string s = "1.2.3";
+ Version v(s);
+ EXPECT_EQ(s, v.ToString());
+ }
+}
+
+TEST(VersionTest, OutputStreamOk)
+{
+ Version v(3,1,4);
+ Version v2(4,10,0);
+
+ stringstream s;
+ s << v << ", " << v2 << ", " << v << endl;
+
+ EXPECT_EQ(string("3.1.4, 4.10.0, 3.1.4\n"), s.str());
+}
--- /dev/null
+// Copyright (c) 2014-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/BamFile.h>
+#include <pbbam/EntireFileQuery.h>
+#include <pbbam/PbiRawData.h>
+#include <pbbam/virtual/WhitelistedZmwReadStitcher.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
+static
+void Compare(const BamRecord& b1, const BamRecord& b2)
+{
+ EXPECT_TRUE(b1.HasDeletionQV());
+ EXPECT_TRUE(b1.HasDeletionTag());
+ EXPECT_TRUE(b1.HasInsertionQV());
+ EXPECT_TRUE(b1.HasMergeQV());
+ EXPECT_TRUE(b1.HasSubstitutionQV());
+ EXPECT_TRUE(b1.HasSubstitutionTag());
+ EXPECT_TRUE(b1.HasLabelQV());
+ EXPECT_TRUE(b1.HasAltLabelQV());
+ EXPECT_TRUE(b1.HasAltLabelTag());
+ EXPECT_TRUE(b1.HasPkmean());
+ EXPECT_TRUE(b1.HasPkmid());
+ EXPECT_TRUE(b1.HasPulseCall());
+ EXPECT_TRUE(b1.HasIPD());
+ EXPECT_TRUE(b1.HasPulseWidth());
+ EXPECT_TRUE(b1.HasPrePulseFrames());
+ EXPECT_TRUE(b1.HasPulseCallWidth());
+ EXPECT_TRUE(b1.HasPulseMergeQV());
+
+ EXPECT_TRUE(b2.HasDeletionQV());
+ EXPECT_TRUE(b2.HasDeletionTag());
+ EXPECT_TRUE(b2.HasInsertionQV());
+ EXPECT_TRUE(b2.HasMergeQV());
+ EXPECT_TRUE(b2.HasSubstitutionQV());
+ EXPECT_TRUE(b2.HasSubstitutionTag());
+ EXPECT_TRUE(b2.HasLabelQV());
+ EXPECT_TRUE(b2.HasAltLabelQV());
+ EXPECT_TRUE(b2.HasAltLabelTag());
+ EXPECT_TRUE(b2.HasPkmean());
+ EXPECT_TRUE(b2.HasPkmid());
+ EXPECT_TRUE(b2.HasPulseCall());
+ EXPECT_TRUE(b2.HasIPD());
+ EXPECT_TRUE(b2.HasPulseWidth());
+ EXPECT_TRUE(b2.HasPrePulseFrames());
+ EXPECT_TRUE(b2.HasPulseCallWidth());
+ EXPECT_TRUE(b2.HasPulseMergeQV());
+
+ EXPECT_EQ(b1.FullName(), b2.FullName());
+ EXPECT_EQ(b1.HoleNumber(), b2.HoleNumber());
+ EXPECT_EQ(b1.NumPasses(), b2.NumPasses());
+ EXPECT_EQ(b1.Sequence(), b2.Sequence());
+ EXPECT_EQ(b1.Qualities(), b2.Qualities());
+ EXPECT_EQ(b1.DeletionQV(), b2.DeletionQV());
+ EXPECT_EQ(b1.DeletionTag(), b2.DeletionTag());
+ EXPECT_EQ(b1.InsertionQV(), b2.InsertionQV());
+ EXPECT_EQ(b1.MergeQV(), b2.MergeQV());
+ EXPECT_EQ(b1.SubstitutionQV(), b2.SubstitutionQV());
+ EXPECT_EQ(b1.SubstitutionTag(), b2.SubstitutionTag());
+ EXPECT_EQ(b1.LabelQV(), b2.LabelQV());
+ EXPECT_EQ(b1.AltLabelQV(), b2.AltLabelQV());
+ EXPECT_EQ(b1.AltLabelTag(), b2.AltLabelTag());
+ EXPECT_EQ(b1.Pkmean(), b2.Pkmean());
+ EXPECT_EQ(b1.Pkmid(), b2.Pkmid());
+ EXPECT_EQ(b1.PulseCall(), b2.PulseCall());
+ EXPECT_EQ(b1.IPD(), b2.IPD());
+ EXPECT_EQ(b1.PulseWidth(), b2.PulseWidth());
+ EXPECT_EQ(b1.PrePulseFrames(), b2.PrePulseFrames());
+ EXPECT_EQ(b1.PulseCallWidth(), b2.PulseCallWidth());
+ EXPECT_EQ(b1.ReadGroup(), b2.ReadGroup());
+ EXPECT_EQ(b1.PulseMergeQV(), b2.PulseMergeQV());
+}
+
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
+TEST(WhitelistedZmwReadStitching, EmptyList)
+{
+ const std::vector<int32_t> whitelist = { };
+ WhitelistedZmwReadStitcher stitcher(whitelist,
+ tests::Data_Dir + "/polymerase/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/internal.scraps.bam");
+ EXPECT_FALSE(stitcher.HasNext());
+ EXPECT_TRUE(stitcher.NextRaw().empty());
+}
+
+TEST(WhitelistedZmwReadStitching, SingleValue)
+{
+ const std::vector<int32_t> whitelist = { 200000 };
+ WhitelistedZmwReadStitcher stitcher(whitelist,
+ tests::Data_Dir + "/polymerase/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/internal.scraps.bam");
+
+ // create virtual record
+ EXPECT_TRUE(stitcher.HasNext());
+ auto virtualRecord = stitcher.Next();
+ EXPECT_FALSE(stitcher.HasNext());
+
+ // fetch original polymerase read (2nd record)
+ BamFile polyBam(tests::Data_Dir + "/polymerase/internal.polymerase.bam");
+ EntireFileQuery polyQuery(polyBam);
+ auto begin = polyQuery.begin();
+ auto end = polyQuery.end();
+ EXPECT_TRUE(begin != end);
+ ++begin;
+ EXPECT_TRUE(begin != end);
+ auto polyRecord = *begin++;
+
+ EXPECT_EQ(200000, virtualRecord.HoleNumber());
+
+ tests::Compare(polyRecord, virtualRecord);
+}
+
+TEST(WhitelistedZmwReadStitching, UnknownZmw)
+{
+ const std::vector<int32_t> whitelist { 42 }; // ZMW not in our files
+ WhitelistedZmwReadStitcher stitcher(whitelist,
+ tests::Data_Dir + "/polymerase/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/internal.scraps.bam");
+
+ EXPECT_FALSE(stitcher.HasNext());
+ EXPECT_TRUE(stitcher.NextRaw().empty());
+}
+
+TEST(WhitelistedZmwReadStitching, MultiValue)
+{
+ const std::vector<int32_t> whitelist = { 100000, 300000 };
+ WhitelistedZmwReadStitcher stitcher(whitelist,
+ tests::Data_Dir + "/polymerase/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/internal.scraps.bam");
+
+
+ // create virtual records
+ EXPECT_TRUE(stitcher.HasNext());
+ auto virtualRecord1 = stitcher.Next();
+ EXPECT_TRUE(stitcher.HasNext());
+ auto virtualRecord2 = stitcher.Next();
+ EXPECT_FALSE(stitcher.HasNext());
+
+ // fetch original polymerase reads (2nd record)
+ BamFile polyBam(tests::Data_Dir + "/polymerase/internal.polymerase.bam");
+ EntireFileQuery polyQuery(polyBam);
+ auto begin = polyQuery.begin();
+ auto end = polyQuery.end();
+
+ EXPECT_TRUE(begin != end);
+ auto polyRecord1 = *begin++;
+ EXPECT_TRUE(begin != end);
+ ++begin;
+ EXPECT_TRUE(begin != end);
+ auto polyRecord2 = *begin++;
+ EXPECT_TRUE(begin == end);
+
+ EXPECT_EQ(100000, virtualRecord1.HoleNumber());
+ EXPECT_EQ(300000, virtualRecord2.HoleNumber());
+
+ tests::Compare(polyRecord1, virtualRecord1);
+ tests::Compare(polyRecord2, virtualRecord2);
+}
+
+TEST(WhitelistedZmwReadStitching, MultiValue_MixedKnownAndUnknown)
+{
+ const std::vector<int32_t> whitelist { 42, 200000, 24 };
+ WhitelistedZmwReadStitcher stitcher(whitelist,
+ tests::Data_Dir + "/polymerase/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/internal.scraps.bam");
+
+ // everything below should behave exactly as 'SingleValueOk' test,
+ // as the unknown ZMWs will have been removed during construction
+
+ // create virtual record
+ EXPECT_TRUE(stitcher.HasNext());
+ auto virtualRecord = stitcher.Next();
+ EXPECT_FALSE(stitcher.HasNext());
+
+ // fetch original polymerase read (2nd record)
+ BamFile polyBam(tests::Data_Dir + "/polymerase/internal.polymerase.bam");
+ EntireFileQuery polyQuery(polyBam);
+ auto begin = polyQuery.begin();
+ auto end = polyQuery.end();
+ EXPECT_TRUE(begin != end);
+ ++begin;
+ EXPECT_TRUE(begin != end);
+ auto polyRecord = *begin++;
+
+ EXPECT_EQ(200000, virtualRecord.HoleNumber());
+
+ tests::Compare(polyRecord, virtualRecord);
+}
+
+TEST(WhitelistedZmwReadStitching, EmptyScrapsFileOk)
+{
+ const std::vector<int32_t> whitelist = { 10944689, 10944690 };
+ const std::string primaryBamFn = tests::Data_Dir + "/polymerase/scrapless.subreads.bam" ;
+ const std::string scrapsBamFn = tests::Data_Dir + "/polymerase/scrapless.scraps.bam" ;
+
+ int count = 0;
+ WhitelistedZmwReadStitcher stitcher(whitelist, primaryBamFn, scrapsBamFn);
+ while (stitcher.HasNext()) {
+ auto record = stitcher.Next();
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(2, count);
+
+ const BamFile primaryBam(primaryBamFn);
+ const BamFile scrapsBam(scrapsBamFn);
+ const PbiRawData primaryIdx(primaryBam.PacBioIndexFilename());
+ const PbiRawData scrapsIdx(scrapsBam.PacBioIndexFilename());
+ EXPECT_EQ(3, primaryIdx.NumReads());
+ EXPECT_EQ(0, scrapsIdx.NumReads());
+}
--- /dev/null
+// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/ZmwQuery.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+//TEST(EntireFileQueryTest, CountRecords)
+//{
+// EXPECT_NO_THROW(
+// {
+// // open input BAM file
+// BamFile bamFile(inputBamFn);
+
+// // count records
+// int count = 0;
+// EntireFileQuery entireFile(bamFile);
+// for (const BamRecord& record : entireFile) {
+// (void)record;
+// ++count;
+// }
+
+// EXPECT_EQ(3307, count);
+// });
+//}
--- /dev/null
+// Copyright (c) 2014-2016, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifdef PBBAM_TESTING
+#define private public
+#endif
+
+#include "TestData.h"
+#include <gtest/gtest.h>
+#include <pbbam/EntireFileQuery.h>
+#include <pbbam/PbiFilter.h>
+#include <pbbam/virtual/VirtualPolymeraseReader.h>
+#include <pbbam/virtual/VirtualPolymeraseCompositeReader.h>
+#include <pbbam/virtual/ZmwReadStitcher.h>
+#include <string>
+using namespace PacBio;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace PacBio {
+namespace BAM {
+namespace tests {
+
+static
+void Compare(const BamRecord& b1, const BamRecord& b2)
+{
+ EXPECT_TRUE(b1.HasDeletionQV());
+ EXPECT_TRUE(b1.HasDeletionTag());
+ EXPECT_TRUE(b1.HasInsertionQV());
+ EXPECT_TRUE(b1.HasMergeQV());
+ EXPECT_TRUE(b1.HasSubstitutionQV());
+ EXPECT_TRUE(b1.HasSubstitutionTag());
+ EXPECT_TRUE(b1.HasLabelQV());
+ EXPECT_TRUE(b1.HasAltLabelQV());
+ EXPECT_TRUE(b1.HasAltLabelTag());
+ EXPECT_TRUE(b1.HasPkmean());
+ EXPECT_TRUE(b1.HasPkmid());
+ EXPECT_TRUE(b1.HasPulseCall());
+ EXPECT_TRUE(b1.HasIPD());
+ EXPECT_TRUE(b1.HasPulseWidth());
+ EXPECT_TRUE(b1.HasPrePulseFrames());
+ EXPECT_TRUE(b1.HasPulseCallWidth());
+ EXPECT_TRUE(b1.HasPulseMergeQV());
+
+ EXPECT_TRUE(b2.HasDeletionQV());
+ EXPECT_TRUE(b2.HasDeletionTag());
+ EXPECT_TRUE(b2.HasInsertionQV());
+ EXPECT_TRUE(b2.HasMergeQV());
+ EXPECT_TRUE(b2.HasSubstitutionQV());
+ EXPECT_TRUE(b2.HasSubstitutionTag());
+ EXPECT_TRUE(b2.HasLabelQV());
+ EXPECT_TRUE(b2.HasAltLabelQV());
+ EXPECT_TRUE(b2.HasAltLabelTag());
+ EXPECT_TRUE(b2.HasPkmean());
+ EXPECT_TRUE(b2.HasPkmid());
+ EXPECT_TRUE(b2.HasPulseCall());
+ EXPECT_TRUE(b2.HasIPD());
+ EXPECT_TRUE(b2.HasPulseWidth());
+ EXPECT_TRUE(b2.HasPrePulseFrames());
+ EXPECT_TRUE(b2.HasPulseCallWidth());
+ EXPECT_TRUE(b2.HasPulseMergeQV());
+
+ EXPECT_EQ(b1.FullName(), b2.FullName());
+ EXPECT_EQ(b1.HoleNumber(), b2.HoleNumber());
+ EXPECT_EQ(b1.NumPasses(), b2.NumPasses());
+ EXPECT_EQ(b1.Sequence(), b2.Sequence());
+ EXPECT_EQ(b1.Qualities(), b2.Qualities());
+ EXPECT_EQ(b1.DeletionQV(), b2.DeletionQV());
+ EXPECT_EQ(b1.DeletionTag(), b2.DeletionTag());
+ EXPECT_EQ(b1.InsertionQV(), b2.InsertionQV());
+ EXPECT_EQ(b1.MergeQV(), b2.MergeQV());
+ EXPECT_EQ(b1.SubstitutionQV(), b2.SubstitutionQV());
+ EXPECT_EQ(b1.SubstitutionTag(), b2.SubstitutionTag());
+ EXPECT_EQ(b1.LabelQV(), b2.LabelQV());
+ EXPECT_EQ(b1.AltLabelQV(), b2.AltLabelQV());
+ EXPECT_EQ(b1.AltLabelTag(), b2.AltLabelTag());
+ EXPECT_EQ(b1.Pkmean(), b2.Pkmean());
+ EXPECT_EQ(b1.Pkmid(), b2.Pkmid());
+ EXPECT_EQ(b1.PulseCall(), b2.PulseCall());
+ EXPECT_EQ(b1.IPD(), b2.IPD());
+ EXPECT_EQ(b1.PulseWidth(), b2.PulseWidth());
+ EXPECT_EQ(b1.PrePulseFrames(), b2.PrePulseFrames());
+ EXPECT_EQ(b1.PulseCallWidth(), b2.PulseCallWidth());
+ EXPECT_EQ(b1.ReadGroup(), b2.ReadGroup());
+ EXPECT_EQ(b1.PulseMergeQV(), b2.PulseMergeQV());
+}
+
+static
+size_t NumVirtualRecords(const string& primaryBamFn,
+ const string& scrapsBamFn)
+{
+ ZmwReadStitcher stitcher(primaryBamFn, scrapsBamFn);
+ size_t count = 0;
+ while (stitcher.HasNext()) {
+ const auto record = stitcher.Next();
+ (void)record;
+ ++count;
+ }
+ return count;
+}
+
+} // namespace tests
+} // namespace BAM
+} // namespace PacBio
+
+TEST(ZmwReadStitching, FromBams_NoFilter)
+{
+ ZmwReadStitcher stitcher(tests::Data_Dir + "/polymerase/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/internal.scraps.bam");
+ size_t count = 0;
+ while (stitcher.HasNext()) {
+ const auto record = stitcher.Next();
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(3, count);
+}
+
+TEST(ZmwReadStitching, FromBams_Filtered)
+{
+ PbiFilter filter { PbiZmwFilter{100000} }; // setup to match DataSet w/ filter
+ ZmwReadStitcher stitcher(tests::Data_Dir + "/polymerase/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/internal.scraps.bam",
+ filter);
+ size_t count = 0;
+ while (stitcher.HasNext()) {
+ const auto record = stitcher.Next();
+ EXPECT_EQ(100000, record.HoleNumber());
+ ++count;
+ }
+ EXPECT_EQ(1, count);
+}
+
+TEST(ZmwReadStitching, FromDataSet_NoFilter)
+{
+ // dataset contains these resources (subreads/scraps + hqregion/scraps BAMs)
+ const string primaryFn1 = tests::Data_Dir + "/polymerase/production.subreads.bam";
+ const string scrapsFn1 = tests::Data_Dir + "/polymerase/production.scraps.bam";
+ const string primaryFn2 = tests::Data_Dir + "/polymerase/production_hq.hqregion.bam";
+ const string scrapsFn2 = tests::Data_Dir + "/polymerase/production_hq.scraps.bam";
+ const size_t numExpectedRecords =
+ tests::NumVirtualRecords(primaryFn1, scrapsFn1) +
+ tests::NumVirtualRecords(primaryFn2, scrapsFn2);
+
+ const string datasetFn = tests::Data_Dir +
+ "/polymerase/multiple_resources.subread.dataset.xml";
+
+ DataSet ds{ datasetFn };
+ ZmwReadStitcher stitcher{ ds };
+ size_t numObservedRecords = 0;
+ while (stitcher.HasNext()) {
+ const auto record = stitcher.Next();
+ (void)record;
+ ++numObservedRecords;
+ }
+ EXPECT_EQ(numExpectedRecords, numObservedRecords);
+}
+
+TEST(ZmwReadStitching, FromDataSet_Filtered)
+{
+ // dataset contains these resources (subreads/scraps + hqregion/scraps BAMs)
+ const string primaryFn1 = tests::Data_Dir + "/polymerase/production.subreads.bam";
+ const string scrapsFn1 = tests::Data_Dir + "/polymerase/production.scraps.bam";
+ const string primaryFn2 = tests::Data_Dir + "/polymerase/internal.subreads.bam";
+ const string scrapsFn2 = tests::Data_Dir + "/polymerase/internal.scraps.bam";
+ const string primaryFn3 = tests::Data_Dir + "/polymerase/production_hq.hqregion.bam";
+ const string scrapsFn3 = tests::Data_Dir + "/polymerase/production_hq.scraps.bam";
+ const size_t totalRecords =
+ tests::NumVirtualRecords(primaryFn1, scrapsFn1) +
+ tests::NumVirtualRecords(primaryFn2, scrapsFn2) +
+ tests::NumVirtualRecords(primaryFn3, scrapsFn3);
+ EXPECT_EQ(5, totalRecords);
+
+ // our filter will remove the 2 "production" BAM pairs
+ // using a ZMW filter that only the "internal" pair should pass
+ const string datasetFn = tests::Data_Dir +
+ "/polymerase/filtered_resources.subread.dataset.xml";
+
+ DataSet ds{ datasetFn };
+ ZmwReadStitcher stitcher{ ds };
+ size_t numObservedRecords = 0;
+ while (stitcher.HasNext()) {
+ const auto record = stitcher.Next();
+ (void)record;
+ ++numObservedRecords;
+ }
+ EXPECT_EQ(1, numObservedRecords);
+}
+
+TEST(ZmwReadStitching, FromDataSet_EmptyDataSet)
+{
+ ZmwReadStitcher stitcher{ DataSet{} };
+ EXPECT_FALSE(stitcher.HasNext());
+}
+
+TEST(ZmwReadStitching, EmptyScrapsFile)
+{
+ const std::string primaryBamFn = tests::Data_Dir + "/polymerase/scrapless.subreads.bam" ;
+ const std::string scrapsBamFn = tests::Data_Dir + "/polymerase/scrapless.scraps.bam" ;
+
+ const BamFile primaryBam(primaryBamFn);
+ const BamFile scrapsBam(scrapsBamFn);
+ const PbiRawData primaryIdx(primaryBam.PacBioIndexFilename());
+ const PbiRawData scrapsIdx(scrapsBam.PacBioIndexFilename());
+ EXPECT_EQ(3, primaryIdx.NumReads());
+ EXPECT_EQ(0, scrapsIdx.NumReads());
+
+ int count = 0;
+ ZmwReadStitcher stitcher(primaryBamFn, scrapsBamFn);
+ while (stitcher.HasNext()) {
+ auto record = stitcher.Next();
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(3, count);
+}
+
+TEST(ZmwReadStitching, VirtualRegions)
+{
+ // Create virtual polymerase read
+ ZmwReadStitcher stitcher(tests::Data_Dir + "/polymerase/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/internal.scraps.bam");
+ auto virtualRecord = stitcher.Next();
+
+ auto regionMap = virtualRecord.VirtualRegionsMap();
+ auto adapter = virtualRecord.VirtualRegionsTable(VirtualRegionType::ADAPTER);
+
+ // Compare different accessors to same source
+ EXPECT_EQ(regionMap[VirtualRegionType::ADAPTER], adapter);
+
+ // Compare to truth
+ EXPECT_EQ(3047,adapter[0].beginPos);
+ EXPECT_EQ(3095,adapter[0].endPos);
+ EXPECT_EQ(3650,adapter[1].beginPos);
+ EXPECT_EQ(3700,adapter[1].endPos);
+ EXPECT_EQ(4289,adapter[2].beginPos);
+ EXPECT_EQ(4335,adapter[2].endPos);
+ EXPECT_EQ(4888,adapter[3].beginPos);
+ EXPECT_EQ(4939,adapter[3].endPos);
+ EXPECT_EQ(5498,adapter[4].beginPos);
+ EXPECT_EQ(5546,adapter[4].endPos);
+ EXPECT_EQ(6116,adapter[5].beginPos);
+ EXPECT_EQ(6173,adapter[5].endPos);
+ EXPECT_EQ(6740,adapter[6].beginPos);
+ EXPECT_EQ(6790,adapter[6].endPos);
+
+ auto barcode = virtualRecord.VirtualRegionsTable(VirtualRegionType::BARCODE);
+ EXPECT_EQ(regionMap[VirtualRegionType::BARCODE], barcode);
+ EXPECT_EQ(3025,barcode[0].beginPos);
+ EXPECT_EQ(3047,barcode[0].endPos);
+ EXPECT_EQ(3095,barcode[1].beginPos);
+ EXPECT_EQ(3116,barcode[1].endPos);
+ EXPECT_EQ(3628,barcode[2].beginPos);
+ EXPECT_EQ(3650,barcode[2].endPos);
+ EXPECT_EQ(3700,barcode[3].beginPos);
+ EXPECT_EQ(3722,barcode[3].endPos);
+ EXPECT_EQ(4267,barcode[4].beginPos);
+ EXPECT_EQ(4289,barcode[4].endPos);
+ EXPECT_EQ(4335,barcode[5].beginPos);
+ EXPECT_EQ(4356,barcode[5].endPos);
+ EXPECT_EQ(4864,barcode[6].beginPos);
+ EXPECT_EQ(4888,barcode[6].endPos);
+ EXPECT_EQ(4939,barcode[7].beginPos);
+ EXPECT_EQ(4960,barcode[7].endPos);
+ EXPECT_EQ(5477,barcode[8].beginPos);
+ EXPECT_EQ(5498,barcode[8].endPos);
+ EXPECT_EQ(5546,barcode[9].beginPos);
+ EXPECT_EQ(5571,barcode[9].endPos);
+ EXPECT_EQ(6087,barcode[10].beginPos);
+ EXPECT_EQ(6116,barcode[10].endPos);
+ EXPECT_EQ(6173,barcode[11].beginPos);
+ EXPECT_EQ(6199,barcode[11].endPos);
+ EXPECT_EQ(6719,barcode[12].beginPos);
+ EXPECT_EQ(6740,barcode[12].endPos);
+ EXPECT_EQ(6790,barcode[13].beginPos);
+ EXPECT_EQ(6812,barcode[13].endPos);
+
+ auto lqregion = virtualRecord.VirtualRegionsTable(VirtualRegionType::LQREGION);
+ EXPECT_EQ(regionMap[VirtualRegionType::LQREGION], lqregion);
+ EXPECT_EQ(0,lqregion[0].beginPos);
+ EXPECT_EQ(2659,lqregion[0].endPos);
+ EXPECT_EQ(7034,lqregion[1].beginPos);
+ EXPECT_EQ(7035,lqregion[1].endPos);
+
+
+ auto hqregion = virtualRecord.VirtualRegionsTable(VirtualRegionType::HQREGION);
+ EXPECT_EQ(regionMap[VirtualRegionType::HQREGION], hqregion);
+ EXPECT_EQ(2659,hqregion[0].beginPos);
+ EXPECT_EQ(7034,hqregion[0].endPos);
+}
+
+TEST(ZmwReadStitching, InternalSubreadsToOriginal)
+{
+ // Create virtual polymerase read
+ ZmwReadStitcher stitcher(tests::Data_Dir + "/polymerase/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/internal.scraps.bam");
+ EXPECT_TRUE(stitcher.HasNext());
+ auto virtualRecord = stitcher.Next();
+
+ // Read original polymerase read
+ BamFile polyBam(tests::Data_Dir + "/polymerase/internal.polymerase.bam");
+ EntireFileQuery polyQuery(polyBam);
+ auto begin = polyQuery.begin();
+ auto end = polyQuery.end();
+ EXPECT_TRUE(begin != end);
+ auto polyRecord = *begin;
+
+ // check
+ tests::Compare(polyRecord, virtualRecord);
+}
+
+TEST(ZmwReadStitching, InternalHQToOriginal)
+{
+ // Create virtual polymerase read
+ ZmwReadStitcher stitcher(tests::Data_Dir + "/polymerase/internal.hqregions.bam",
+ tests::Data_Dir + "/polymerase/internal.lqregions.bam");
+ EXPECT_TRUE(stitcher.HasNext());
+ auto virtualRecord = stitcher.Next();
+
+ // Read original polymerase read
+ BamFile polyBam(tests::Data_Dir + "/polymerase/internal.polymerase.bam");
+ EntireFileQuery polyQuery(polyBam);
+ auto begin = polyQuery.begin();
+ auto end = polyQuery.end();
+ EXPECT_TRUE(begin != end);
+ auto polyRecord = *begin;
+
+ // check
+ tests::Compare(polyRecord, virtualRecord);
+}
+
+TEST(ZmwReadStitching, ProductionSubreadsToOriginal)
+{
+ // Create virtual polymerase read
+ ZmwReadStitcher stitcher(tests::Data_Dir + "/polymerase/production.subreads.bam",
+ tests::Data_Dir + "/polymerase/production.scraps.bam");
+
+ EXPECT_TRUE(stitcher.HasNext());
+ auto virtualRecord = stitcher.Next();
+ EXPECT_FALSE(stitcher.HasNext());
+
+ // Read original polymerase read
+ BamFile polyBam(tests::Data_Dir + "/polymerase/production.polymerase.bam");
+ EntireFileQuery polyQuery(polyBam);
+
+ auto begin = polyQuery.begin();
+ auto end = polyQuery.end();
+ EXPECT_TRUE(begin != end);
+ auto polyRecord = *begin;
+
+ EXPECT_EQ(polyRecord.FullName(), virtualRecord.FullName());
+ EXPECT_EQ(polyRecord.HoleNumber(), virtualRecord.HoleNumber());
+ EXPECT_FLOAT_EQ(polyRecord.ReadAccuracy(), virtualRecord.ReadAccuracy());
+ EXPECT_EQ(polyRecord.NumPasses(), virtualRecord.NumPasses());
+ EXPECT_EQ(polyRecord.Sequence(), virtualRecord.Sequence());
+ EXPECT_EQ(polyRecord.Qualities(), virtualRecord.Qualities());
+ EXPECT_EQ(polyRecord.DeletionQV(), virtualRecord.DeletionQV());
+ EXPECT_EQ(polyRecord.DeletionTag(), virtualRecord.DeletionTag());
+ EXPECT_EQ(polyRecord.InsertionQV(), virtualRecord.InsertionQV());
+ EXPECT_EQ(polyRecord.MergeQV(), virtualRecord.MergeQV());
+ EXPECT_EQ(polyRecord.SubstitutionQV(), virtualRecord.SubstitutionQV());
+ EXPECT_EQ(polyRecord.SubstitutionTag(), virtualRecord.SubstitutionTag());
+ EXPECT_EQ(polyRecord.IPD(), virtualRecord.IPDV1Frames());
+ EXPECT_EQ(polyRecord.ReadGroup(), virtualRecord.ReadGroup());
+}
+
+TEST(ZmwReadStitching, ProductionHQToOriginal)
+{
+ // Create virtual polymerase read
+ ZmwReadStitcher stitcher(tests::Data_Dir + "/polymerase/production_hq.hqregion.bam",
+ tests::Data_Dir + "/polymerase/production_hq.scraps.bam");
+ EXPECT_TRUE(stitcher.HasNext());
+ auto virtualRecord = stitcher.Next();
+ EXPECT_FALSE(stitcher.HasNext());
+
+ // Read original polymerase read
+ BamFile polyBam(tests::Data_Dir + "/polymerase/production.polymerase.bam");
+ EntireFileQuery polyQuery(polyBam);
+
+ auto begin = polyQuery.begin();
+ auto end = polyQuery.end();
+ EXPECT_TRUE(begin != end);
+ auto polyRecord = *begin;
+
+ EXPECT_FALSE(polyRecord.HasPulseCall());
+ EXPECT_FALSE(virtualRecord.HasPulseCall());
+ EXPECT_EQ(polyRecord.FullName(), virtualRecord.FullName());
+ EXPECT_EQ(polyRecord.HoleNumber(), virtualRecord.HoleNumber());
+ EXPECT_EQ(polyRecord.ReadAccuracy(), virtualRecord.ReadAccuracy());
+ EXPECT_EQ(polyRecord.NumPasses(), virtualRecord.NumPasses());
+ EXPECT_EQ(polyRecord.Sequence(), virtualRecord.Sequence());
+ EXPECT_EQ(polyRecord.Qualities(), virtualRecord.Qualities());
+ EXPECT_EQ(polyRecord.DeletionQV(), virtualRecord.DeletionQV());
+ EXPECT_EQ(polyRecord.DeletionTag(), virtualRecord.DeletionTag());
+ EXPECT_EQ(polyRecord.InsertionQV(), virtualRecord.InsertionQV());
+ EXPECT_EQ(polyRecord.MergeQV(), virtualRecord.MergeQV());
+ EXPECT_EQ(polyRecord.SubstitutionQV(), virtualRecord.SubstitutionQV());
+ EXPECT_EQ(polyRecord.SubstitutionTag(), virtualRecord.SubstitutionTag());
+ EXPECT_EQ(polyRecord.IPD(), virtualRecord.IPDV1Frames());
+ EXPECT_EQ(polyRecord.ReadGroup(), virtualRecord.ReadGroup());
+
+ EXPECT_TRUE(polyRecord.HasDeletionQV());
+ EXPECT_TRUE(polyRecord.HasDeletionTag());
+ EXPECT_TRUE(polyRecord.HasInsertionQV());
+ EXPECT_TRUE(polyRecord.HasMergeQV());
+ EXPECT_TRUE(polyRecord.HasSubstitutionQV());
+ EXPECT_TRUE(polyRecord.HasSubstitutionTag());
+ EXPECT_TRUE(polyRecord.HasIPD());
+ EXPECT_FALSE(polyRecord.HasLabelQV());
+ EXPECT_FALSE(polyRecord.HasAltLabelQV());
+ EXPECT_FALSE(polyRecord.HasAltLabelTag());
+ EXPECT_FALSE(polyRecord.HasPkmean());
+ EXPECT_FALSE(polyRecord.HasPkmid());
+ EXPECT_FALSE(polyRecord.HasPulseCall());
+ EXPECT_FALSE(polyRecord.HasPulseWidth());
+ EXPECT_FALSE(polyRecord.HasPrePulseFrames());
+ EXPECT_FALSE(polyRecord.HasPulseCallWidth());
+
+ EXPECT_TRUE(virtualRecord.HasDeletionQV());
+ EXPECT_TRUE(virtualRecord.HasDeletionTag());
+ EXPECT_TRUE(virtualRecord.HasInsertionQV());
+ EXPECT_TRUE(virtualRecord.HasMergeQV());
+ EXPECT_TRUE(virtualRecord.HasSubstitutionQV());
+ EXPECT_TRUE(virtualRecord.HasSubstitutionTag());
+ EXPECT_TRUE(virtualRecord.HasIPD());
+ EXPECT_FALSE(virtualRecord.HasLabelQV());
+ EXPECT_FALSE(virtualRecord.HasAltLabelQV());
+ EXPECT_FALSE(virtualRecord.HasAltLabelTag());
+ EXPECT_FALSE(virtualRecord.HasPkmean());
+ EXPECT_FALSE(virtualRecord.HasPkmid());
+ EXPECT_FALSE(virtualRecord.HasPulseCall());
+ EXPECT_FALSE(virtualRecord.HasPulseWidth());
+ EXPECT_FALSE(virtualRecord.HasPrePulseFrames());
+ EXPECT_FALSE(virtualRecord.HasPulseCallWidth());
+}
+
+TEST(ZmwReadStitching, VirtualRecord_VirtualRegionsTable)
+{
+ ZmwReadStitcher stitcher(tests::Data_Dir + "/polymerase/production.subreads.bam",
+ tests::Data_Dir + "/polymerase/production.scraps.bam");
+ EXPECT_TRUE(stitcher.HasNext());
+ const auto virtualRecord = stitcher.Next();
+
+ const auto subreads = virtualRecord.VirtualRegionsTable(VirtualRegionType::SUBREAD);
+ const auto adapters = virtualRecord.VirtualRegionsTable(VirtualRegionType::ADAPTER);
+ const auto hqRegions = virtualRecord.VirtualRegionsTable(VirtualRegionType::HQREGION);
+ const auto lqRegions = virtualRecord.VirtualRegionsTable(VirtualRegionType::LQREGION);
+ const auto barcodes = virtualRecord.VirtualRegionsTable(VirtualRegionType::BARCODE);
+ const auto filtered = virtualRecord.VirtualRegionsTable(VirtualRegionType::FILTERED);
+
+ EXPECT_FALSE(subreads.empty());
+ EXPECT_FALSE(adapters.empty());
+ EXPECT_FALSE(hqRegions.empty());
+ EXPECT_FALSE(lqRegions.empty());
+ EXPECT_FALSE(barcodes.empty());
+ EXPECT_TRUE(filtered.empty()); // this type not present in this data
+}
+
+TEST(ZmwReadStitching, LegacyTypedefsOk)
+{
+ {
+ VirtualPolymeraseReader reader(tests::Data_Dir + "/polymerase/internal.subreads.bam",
+ tests::Data_Dir + "/polymerase/internal.scraps.bam");
+ size_t count = 0;
+ while (reader.HasNext()) {
+ const auto record = reader.Next();
+ (void)record;
+ ++count;
+ }
+ EXPECT_EQ(3, count);
+ }
+
+ {
+ VirtualPolymeraseCompositeReader reader{ DataSet{} };
+ EXPECT_FALSE(reader.HasNext());
+ }
+}
+
--- /dev/null
+
+
+if(DEFINED PacBioBAM_build_pbindex)
+
+ # Deprecating the "PacBioBAM_build_pbindex" command line option in favor of more
+ # general "PacBioBAM_build_tools", as we're starting to add new utilities.
+ #
+ # That said, I don't want to break current auto tests/builds, so I'm providing a
+ # warning message so devs are aware.
+ #
+ # construct warning message
+ set(pbindex_warning "\nDeprecated:\n-DPacBioBAM_build_pbindex\n")
+ if (PacBioBAM_build_pbindex)
+ set(pbindex_warning "${pbindex_warning} Building as requested,")
+ else()
+ set(pbindex_warning "${pbindex_warning} Skipping as requested,")
+ endif()
+ set(pbindex_warning "${pbindex_warning} but support for this option will be removed at some point in the future.\n")
+ message(AUTHOR_WARNING "${pbindex_warning} ** Use -DPacBioBAM_build_tools instead. **\n")
+
+ # force PacBioBAM_build_tools option
+ set(PacBioBAM_build_tools
+ ${PacBioBAM_build_pbindex} CACHE BOOL
+ "Build PacBioBAM with add'l utilities (e.g. pbindex, pbindexdump)." FORCE)
+endif()
+
+if (PacBioBAM_build_tools)
+
+ # tools directory
+ set(ToolsCommonDir ${PacBioBAM_ToolsDir}/common)
+ set(PacBioBAM_CramTestsDir ${PacBioBAM_TestsDir}/src/cram)
+
+ # quash warning with OptionParser
+ include(CheckCXXCompilerFlag)
+ check_cxx_compiler_flag("-Wno-unused-private-field" HAS_NO_UNUSED_PRIVATE_FIELD)
+ if(HAS_NO_UNUSED_PRIVATE_FIELD)
+ set(PacBioBAM_CXX_FLAGS "${PacBioBAM_CXX_FLAGS} -Wno-unused-private-field")
+ endif()
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PacBioBAM_CXX_FLAGS}")
+
+ # tools
+ add_subdirectory(bam2sam)
+ add_subdirectory(pbindex)
+ add_subdirectory(pbindexdump)
+ add_subdirectory(pbmerge)
+
+endif()
--- /dev/null
+
+set(Bam2SamSrcDir ${PacBioBAM_ToolsDir}/bam2sam/src)
+
+# create version header
+set(Bam2Sam_VERSION ${PacBioBAM_VERSION})
+configure_file(
+ ${Bam2SamSrcDir}/Bam2SamVersion.h.in ${GeneratedDir}/Bam2SamVersion.h @ONLY
+)
+
+# list source files
+set(BAM2SAM_SOURCES
+ ${ToolsCommonDir}/OptionParser.cpp
+ ${Bam2SamSrcDir}/main.cpp
+ ${Bam2SamSrcDir}/Bam2Sam.cpp
+)
+
+# build bam2sam executable
+include(PbbamTool)
+create_pbbam_tool(
+ TARGET bam2sam
+ SOURCES ${BAM2SAM_SOURCES}
+)
+
+# cram tests
+if (PacBioBAM_build_tests)
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/bam2sam.t.in
+ ${GeneratedDir}/bam2sam.t
+ )
+
+ add_test(
+ NAME bam2sam_CramTests
+ WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
+ COMMAND "python" cram.py
+ ${GeneratedDir}/bam2sam.t
+ )
+
+endif()
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "Bam2Sam.h"
+#include <htslib/sam.h>
+#include <stdexcept>
+#include <memory>
+#include <cassert>
+using namespace bam2sam;
+using namespace std;
+
+namespace bam2sam {
+
+struct HtslibFileDeleter
+{
+ void operator()(samFile* file)
+ {
+ if (file)
+ sam_close(file);
+ file = nullptr;
+ }
+};
+
+struct HtslibHeaderDeleter
+{
+ void operator()(bam_hdr_t* hdr)
+ {
+ if (hdr)
+ bam_hdr_destroy(hdr);
+ hdr = nullptr;
+ }
+};
+
+struct HtslibRecordDeleter
+{
+ void operator()(bam1_t* b)
+ {
+ if (b)
+ bam_destroy1(b);
+ b = nullptr;
+ }
+};
+
+} // namespace bam2sam
+
+void PbBam2Sam::Run(const Settings &settings)
+{
+ int htslibResult = 0;
+
+ // open files
+
+ unique_ptr<samFile, HtslibFileDeleter> inFileWrapper(sam_open(settings.inputFilename_.c_str(), "rb"));
+ samFile* in = inFileWrapper.get();
+ if (!in || !in->fp.bgzf)
+ throw std::runtime_error("could not read from stdin");
+
+ unique_ptr<samFile, HtslibFileDeleter> outFileWrapper(sam_open("-", "w"));
+ samFile* out = outFileWrapper.get();
+ if (!out)
+ throw std::runtime_error("could not write to stdout");
+
+ // fetch & write header
+
+ unique_ptr<bam_hdr_t, HtslibHeaderDeleter> headerWrapper(bam_hdr_read(in->fp.bgzf));
+ bam_hdr_t* hdr = headerWrapper.get();
+ if (!hdr)
+ throw std::runtime_error("could not read header");
+
+ if (!settings.noHeader_) {
+ htslibResult = sam_hdr_write(out, hdr);
+ if (htslibResult != 0)
+ throw std::runtime_error("could not write header");
+ if (settings.printHeaderOnly_)
+ return;
+ }
+
+ // fetch & write records
+
+ unique_ptr<bam1_t, HtslibRecordDeleter> recordWrapper(bam_init1());
+ bam1_t* b = recordWrapper.get();
+
+ while ((htslibResult = sam_read1(in, hdr, b)) >= 0) {
+ htslibResult = sam_write1(out, hdr, b);
+ if (htslibResult < 0)
+ throw std::runtime_error("error writing record to stdout");
+ }
+}
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef BAM2SAM_H
+#define BAM2SAM_H
+
+#include "Settings.h"
+
+namespace bam2sam {
+
+class PbBam2Sam
+{
+public:
+ static void Run(const Settings& settings);
+};
+
+} // namespace bam2sam
+
+#endif // PBIBAM2SAM_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef BAM2SAMVERSION_H
+#define BAM2SAMVERSION_H
+
+#include <string>
+
+namespace bam2sam {
+
+const std::string Version = std::string("@Bam2Sam_VERSION@");
+
+} // namespace bam2sam
+
+#endif // BAM2SAMVERSION_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef SETTINGS_H
+#define SETTINGS_H
+
+#include <string>
+#include <vector>
+
+namespace bam2sam {
+
+class Settings
+{
+public:
+ Settings(void)
+ : noHeader_(false)
+ , printHeaderOnly_(false)
+ { }
+
+public:
+ std::string inputFilename_;
+ bool noHeader_;
+ bool printHeaderOnly_;
+ std::vector<std::string> errors_;
+};
+
+} // namespace bam2sam
+
+#endif // SETTINGS_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "../common/OptionParser.h"
+#include "Bam2Sam.h"
+#include "Bam2SamVersion.h"
+#include <string>
+#include <vector>
+#include <cassert>
+#include <cstdlib>
+
+static
+bam2sam::Settings fromCommandLine(optparse::OptionParser& parser,
+ int argc, char* argv[])
+{
+ bam2sam::Settings settings;
+
+ const optparse::Values options = parser.parse_args(argc, argv);
+
+ // input
+ const std::vector<std::string> positionalArgs = parser.args();
+ const size_t numPositionalArgs = positionalArgs.size();
+ if (numPositionalArgs == 0)
+ settings.inputFilename_ = "-"; // stdin
+ else if (numPositionalArgs == 1)
+ settings.inputFilename_ = parser.args().front();
+ else {
+ assert(numPositionalArgs > 1);
+ settings.errors_.push_back("bam2sam does not support more than one input file per run");
+ }
+
+ // header options
+ if (options.is_set("no_header"))
+ settings.noHeader_ = options.get("no_header");
+ if (options.is_set("header_only"))
+ settings.printHeaderOnly_ = options.get("header_only");
+
+ if (settings.noHeader_ && settings.printHeaderOnly_)
+ settings.errors_.push_back("conflicting arguments requested: --no-header and --header-only");
+
+ return settings;
+}
+
+int main(int argc, char* argv[])
+{
+ // setup help & options
+ optparse::OptionParser parser;
+ parser.description("bam2sam converts a BAM file to SAM. It is essentially a stripped-down "
+ "'samtools view', mostly useful for testing/debugging without requiring samtools. "
+ "Input BAM file is read from a file or stdin, and SAM output is written to stdout."
+ );
+ parser.prog("bam2sam");
+ parser.usage("bam2sam [options] [input]");
+ parser.version(bam2sam::Version);
+ parser.add_version_option(true);
+ parser.add_help_option(true);
+
+ auto optionGroup = optparse::OptionGroup(parser, "Options");
+ optionGroup.add_option("")
+ .dest("input")
+ .metavar("input")
+ .help("Input BAM file. If not provided, stdin will be used as input.");
+ optionGroup.add_option("--no-header")
+ .dest("no_header")
+ .action("store_true")
+ .help("Omit header from output.");
+ optionGroup.add_option("--header-only")
+ .dest("header_only")
+ .action("store_true")
+ .help("Print only the header (no records).");
+ parser.add_option_group(optionGroup);
+
+ // parse command line for settings
+ const bam2sam::Settings settings = fromCommandLine(parser, argc, argv);
+ if (!settings.errors_.empty()) {
+ std::cerr << std::endl;
+ for (const auto e : settings.errors_)
+ std::cerr << "ERROR: " << e << std::endl;
+ std::cerr << std::endl;
+ parser.print_help();
+ return EXIT_FAILURE;
+ }
+
+ // run tool
+ try {
+ bam2sam::PbBam2Sam::Run(settings);
+ return EXIT_SUCCESS;
+ }
+ catch (std::exception& e) {
+ std::cerr << "ERROR: " << e.what() << std::endl;
+ return EXIT_FAILURE;
+ }
+}
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef BAMFILEMERGER_H
+#define BAMFILEMERGER_H
+
+#include <pbbam/DataSet.h>
+#include <pbbam/PbiFilter.h>
+#include <pbbam/ProgramInfo.h>
+#include <string>
+#include <vector>
+
+namespace PacBio {
+namespace BAM {
+namespace common {
+
+class BamFileMerger
+{
+public:
+ /// \brief Runs merger on a dataset, applying any supplied filters.
+ ///
+ /// When this function exits, a merged BAM (and optional PBI) will have been
+ /// written and closed.
+ ///
+ /// \param[in] dataset provides input filenames & filters
+ /// \param[in] outputFilename resulting BAM output
+ /// \param[in] mergeProgram info about the calling program. Adds a @PG entry to merged header.
+ /// \param[in] createPbi if true, creates a PBI alongside output BAM
+ ///
+ /// \throws std::runtime_error if any any errors encountered while reading or writing
+ ///
+ static void Merge(const PacBio::BAM::DataSet& dataset,
+ const std::string& outputFilename,
+ const PacBio::BAM::ProgramInfo& mergeProgram = PacBio::BAM::ProgramInfo(),
+ bool createPbi = true);
+};
+
+} // namespace common
+} // namespace BAM
+} // namespace PacBio
+
+#include "BamFileMerger.inl"
+
+#endif // BAMFILEMERGER_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "BamFileMerger.h"
+
+#include <pbbam/BamHeader.h>
+#include <pbbam/BamReader.h>
+#include <pbbam/BamRecord.h>
+#include <pbbam/BamWriter.h>
+#include <pbbam/CompositeBamReader.h>
+#include <pbbam/PbiBuilder.h>
+
+#include <deque>
+#include <memory>
+#include <stdexcept>
+#include <cassert>
+
+namespace PacBio {
+namespace BAM {
+namespace common {
+
+// ICollator
+
+class ICollator
+{
+public:
+ ~ICollator(void) { }
+
+ bool GetNext(BamRecord& record)
+ {
+ // nothing left to read
+ if (mergeItems_.empty())
+ return false;
+
+ // non-destructive 'pop' of first item from queue
+ auto firstIter = mergeItems_.begin();
+ auto firstItem = PacBio::BAM::internal::CompositeMergeItem{ std::move(firstIter->reader),
+ std::move(firstIter->record)
+ };
+ mergeItems_.pop_front();
+
+ // store its record in our output record
+ std::swap(record, firstItem.record);
+
+ // try fetch 'next' from first item's reader
+ // if successful, re-insert it into container & re-sort on our new values
+ // otherwise, this item will go out of scope & reader destroyed
+ if (firstItem.reader->GetNext(firstItem.record)) {
+ mergeItems_.push_front(std::move(firstItem));
+ UpdateSort();
+ }
+
+ // return success
+ return true;
+ }
+
+protected:
+ std::deque<PacBio::BAM::internal::CompositeMergeItem> mergeItems_;
+
+protected:
+ ICollator(std::vector<std::unique_ptr<PacBio::BAM::BamReader> >&& readers)
+ {
+ for (auto&& reader : readers) {
+ auto item = internal::CompositeMergeItem{std::move(reader)};
+ if (item.reader->GetNext(item.record))
+ mergeItems_.push_back(std::move(item));
+ }
+ }
+
+ virtual void UpdateSort(void) =0;
+};
+
+// QNameCollator
+
+struct QNameSorter : std::binary_function<internal::CompositeMergeItem,
+ internal::CompositeMergeItem,
+ bool>
+{
+ bool operator()(const internal::CompositeMergeItem& lhs,
+ const internal::CompositeMergeItem& rhs)
+ {
+ const BamRecord& l = lhs.record;
+ const BamRecord& r = rhs.record;
+
+ // movie name
+ const int cmp = l.MovieName().compare(r.MovieName());
+ if (cmp != 0)
+ return cmp < 0;
+
+ // hole number
+ const auto lhsZmw = l.HoleNumber();
+ const auto rhsZmw = r.HoleNumber();
+ if (lhsZmw != rhsZmw)
+ return lhsZmw < rhsZmw;
+
+ // shuffle CCS reads after all others
+ const auto lhsReadType = l.Type();
+ const auto rhsReadType = r.Type();
+ if (lhsReadType == RecordType::CCS)
+ return false;
+ if (rhsReadType == RecordType::CCS)
+ return true;
+
+ // sort on qStart, then finally qEnd
+ const auto lhsQStart = l.QueryStart();
+ const auto rhsQStart = r.QueryStart();
+ return lhsQStart < rhsQStart;
+ }
+};
+
+class QNameCollator : public ICollator
+{
+public:
+ QNameCollator(std::vector<std::unique_ptr<PacBio::BAM::BamReader>>&& readers)
+ : ICollator(std::move(readers))
+ { UpdateSort(); }
+
+ void UpdateSort(void)
+ { std::sort(mergeItems_.begin(), mergeItems_.end(), QNameSorter{ }); }
+};
+
+// AlignedCollator
+
+class AlignedCollator : public ICollator
+{
+public:
+ AlignedCollator(std::vector<std::unique_ptr<PacBio::BAM::BamReader>>&& readers)
+ : ICollator(std::move(readers))
+ { UpdateSort(); }
+
+ void UpdateSort(void)
+ { std::sort(mergeItems_.begin(), mergeItems_.end(), PacBio::BAM::PositionSorter{ }); }
+};
+
+// BamFileMerger
+
+inline
+void BamFileMerger::Merge(const DataSet& dataset,
+ const std::string& outputFilename,
+ const ProgramInfo& mergeProgram,
+ bool createPbi)
+{
+ const PbiFilter filter = PbiFilter::FromDataSet(dataset);
+
+ std::vector<std::string> inputFilenames_;
+ const auto& bamFiles = dataset.BamFiles();
+ inputFilenames_.reserve(bamFiles.size());
+ for (const auto& file : bamFiles)
+ inputFilenames_.push_back(file.Filename());
+
+ if (inputFilenames_.empty())
+ throw std::runtime_error("no input filenames provided to BamFileMerger");
+
+ if (outputFilename.empty())
+ throw std::runtime_error("no output filename provide to BamFileMerger");
+
+
+ // attempt open input files
+ std::vector<std::unique_ptr<BamReader> > readers;
+ readers.reserve(inputFilenames_.size());
+ for (const auto& fn : inputFilenames_) {
+ if (filter.IsEmpty())
+ readers.emplace_back(new BamReader(fn));
+ else
+ readers.emplace_back(new PbiIndexedBamReader(filter, fn));
+ }
+
+ // read headers
+ std::vector<BamHeader> headers;
+ headers.reserve(readers.size());
+ for (auto&& reader : readers)
+ headers.push_back(reader->Header());
+
+ assert(!readers.empty());
+ assert(!headers.empty());
+
+ // merge headers
+ BamHeader mergedHeader = headers.front();
+ const std::string& usingSortOrder = mergedHeader.SortOrder();
+ const bool isCoordinateSorted = (usingSortOrder == "coordinate");
+ for (size_t i = 1; i < headers.size(); ++i) {
+ const BamHeader& header = headers.at(i);
+ if (header.SortOrder() != usingSortOrder)
+ throw std::runtime_error("BAM file sort orders do not match, aborting merge");
+ mergedHeader += headers.at(i);
+ }
+ if (mergeProgram.IsValid())
+ mergedHeader.AddProgram(mergeProgram);
+
+ // setup collator, based on sort order
+ std::unique_ptr<ICollator> collator;
+ if (isCoordinateSorted)
+ collator.reset(new AlignedCollator(std::move(readers)));
+ else
+ collator.reset(new QNameCollator(std::move(readers)));
+ // NOTE: readers *moved*, so no longer accessible here
+
+ // do merge, creating PBI on-the-fly
+ if (createPbi && (outputFilename != "-")) {
+
+ // TODO: this implementation recalculates all PBI values, when we really
+ // only need to collate entries and update offsets
+
+ BamWriter writer(outputFilename, mergedHeader);
+ PbiBuilder builder{ (outputFilename + ".pbi"),
+ mergedHeader.NumSequences(),
+ isCoordinateSorted
+ };
+ BamRecord record;
+ int64_t vOffset = 0;
+ while (collator->GetNext(record)) {
+ writer.Write(record, &vOffset);
+ builder.AddRecord(record, vOffset);
+ }
+ }
+
+ // otherwise just merge BAM
+ else {
+ BamWriter writer(outputFilename, mergedHeader);
+ BamRecord record;
+ while (collator->GetNext(record))
+ writer.Write(record);
+ }
+}
+
+} // namespace common
+} // namespace BAM
+} // namespace PacBio
--- /dev/null
+/**
+ * Copyright (C) 2010 Johannes Weißl <jargon@molb.org>
+ * License: your favourite BSD-style license
+ *
+ * See OptionParser.h for help.
+ */
+
+#include "OptionParser.h"
+
+#include <cstdlib>
+#include <algorithm>
+#include <complex>
+#include <ciso646>
+
+#if defined(ENABLE_NLS) && ENABLE_NLS
+# include <libintl.h>
+# define _(s) gettext(s)
+#else
+# define _(s) ((const char *) (s))
+#endif
+
+using namespace std;
+
+namespace optparse {
+
+////////// auxiliary (string) functions { //////////
+class str_wrap {
+public:
+ str_wrap(const string& l, const string& r) : lwrap(l), rwrap(r) {}
+ str_wrap(const string& w) : lwrap(w), rwrap(w) {}
+ string operator() (const string& s) { return lwrap + s + rwrap; }
+ const string lwrap, rwrap;
+};
+template<typename InputIterator, typename UnaryOperator>
+static string str_join_trans(const string& sep, InputIterator begin, InputIterator end, UnaryOperator op) {
+ string buf;
+ for (InputIterator it = begin; it != end; ++it) {
+ if (it != begin)
+ buf += sep;
+ buf += op(*it);
+ }
+ return buf;
+}
+template<class InputIterator>
+static string str_join(const string& sep, InputIterator begin, InputIterator end) {
+ return str_join_trans(sep, begin, end, str_wrap(""));
+}
+static string& str_replace(string& s, const string& patt, const string& repl) {
+ size_t pos = 0, n = patt.length();
+ while (true) {
+ pos = s.find(patt, pos);
+ if (pos == string::npos)
+ break;
+ s.replace(pos, n, repl);
+ pos += repl.size();
+ }
+ return s;
+}
+static string str_replace(const string& s, const string& patt, const string& repl) {
+ string tmp = s;
+ str_replace(tmp, patt, repl);
+ return tmp;
+}
+static string str_format(const string& s, size_t pre, size_t len, bool indent_first = true) {
+ stringstream ss;
+ string p;
+ if (indent_first)
+ p = string(pre, ' ');
+
+ size_t pos = 0, linestart = 0;
+ size_t line = 0;
+ while (true) {
+ bool wrap = false;
+
+ size_t new_pos = s.find_first_of(" \n\t", pos);
+ if (new_pos == string::npos)
+ break;
+ if (s[new_pos] == '\n') {
+ pos = new_pos + 1;
+ wrap = true;
+ }
+ if (line == 1)
+ p = string(pre, ' ');
+ if (wrap || new_pos + pre > linestart + len) {
+ ss << p << s.substr(linestart, pos - linestart - 1) << endl;
+ linestart = pos;
+ line++;
+ }
+ pos = new_pos + 1;
+ }
+ ss << p << s.substr(linestart) << endl;
+ return ss.str();
+}
+static string str_inc(const string& s) {
+ stringstream ss;
+ string v = (s != "") ? s : "0";
+ long i;
+ istringstream(v) >> i;
+ ss << i+1;
+ return ss.str();
+}
+static unsigned int cols() {
+ unsigned int n = 80;
+#ifndef _WIN32
+ const char *s = getenv("COLUMNS");
+ if (s)
+ istringstream(s) >> n;
+#endif
+ return n;
+}
+static string basename(const string& s) {
+ string b = s;
+ size_t i = b.find_last_not_of('/');
+ if (i == string::npos) {
+ if (b[0] == '/')
+ b.erase(1);
+ return b;
+ }
+ b.erase(i+1, b.length()-i-1);
+ i = b.find_last_of("/");
+ if (i != string::npos)
+ b.erase(0, i+1);
+ return b;
+}
+////////// } auxiliary (string) functions //////////
+
+
+////////// class OptionParser { //////////
+OptionParser::OptionParser() :
+ _usage(_("%prog [options]")),
+ _add_help_option(true),
+ _add_version_option(true),
+ _interspersed_args(true) {}
+
+Option& OptionParser::add_option(const string& opt) {
+ const string tmp[1] = { opt };
+ return add_option(vector<string>(&tmp[0], &tmp[1]));
+}
+Option& OptionParser::add_option(const string& opt1, const string& opt2) {
+ const string tmp[2] = { opt1, opt2 };
+ return add_option(vector<string>(&tmp[0], &tmp[2]));
+}
+Option& OptionParser::add_option(const string& opt1, const string& opt2, const string& opt3) {
+ const string tmp[3] = { opt1, opt2, opt3 };
+ return add_option(vector<string>(&tmp[0], &tmp[3]));
+}
+Option& OptionParser::add_option(const vector<string>& v) {
+ _opts.resize(_opts.size()+1);
+ Option& option = _opts.back();
+ string dest_fallback;
+ for (vector<string>::const_iterator it = v.begin(); it != v.end(); ++it) {
+ if (it->substr(0,2) == "--") {
+ const string s = it->substr(2);
+ if (option.dest() == "")
+ option.dest(str_replace(s, "-", "_"));
+ option._long_opts.insert(s);
+ _optmap_l[s] = &option;
+ } else if ( it->empty() ) {
+ continue;
+ } else {
+ const string s = it->substr(1,1);
+ if (dest_fallback == "")
+ dest_fallback = s;
+ option._short_opts.insert(s);
+ _optmap_s[s] = &option;
+ }
+ }
+ if (option.dest() == "")
+ option.dest(dest_fallback);
+ return option;
+}
+
+OptionParser& OptionParser::add_option_group(const OptionGroup& group) {
+ for (list<Option>::const_iterator oit = group._opts.begin(); oit != group._opts.end(); ++oit) {
+ const Option& option = *oit;
+ for (set<string>::const_iterator it = option._short_opts.begin(); it != option._short_opts.end(); ++it)
+ _optmap_s[*it] = &option;
+ for (set<string>::const_iterator it = option._long_opts.begin(); it != option._long_opts.end(); ++it)
+ _optmap_l[*it] = &option;
+ }
+ _groups.push_back(&group);
+ return *this;
+}
+
+const Option& OptionParser::lookup_short_opt(const string& opt) const {
+ optMap::const_iterator it = _optmap_s.find(opt);
+ if (it == _optmap_s.end())
+ error(_("no such option") + string(": -") + opt);
+ return *it->second;
+}
+
+void OptionParser::handle_short_opt(const string& opt, const string& arg) {
+
+ _remaining.pop_front();
+ string value;
+
+ const Option& option = lookup_short_opt(opt);
+ if (option._nargs == 1) {
+ value = arg.substr(2);
+ if (value == "") {
+ if (_remaining.empty())
+ error("-" + opt + " " + _("option requires an argument"));
+ value = _remaining.front();
+ _remaining.pop_front();
+ }
+ } else {
+ if (arg.length() > 2)
+ _remaining.push_front(string("-") + arg.substr(2));
+ }
+
+ process_opt(option, string("-") + opt, value);
+}
+
+const Option& OptionParser::lookup_long_opt(const string& opt) const {
+
+ list<string> matching;
+ for (optMap::const_iterator it = _optmap_l.begin(); it != _optmap_l.end(); ++it) {
+ if (it->first.compare(0, opt.length(), opt) == 0)
+ matching.push_back(it->first);
+ }
+ if (matching.size() > 1) {
+ string x = str_join(", ", matching.begin(), matching.end());
+ error(_("ambiguous option") + string(": --") + opt + " (" + x + "?)");
+ }
+ if (matching.size() == 0)
+ error(_("no such option") + string(": --") + opt);
+
+ return *_optmap_l.find(matching.front())->second;
+}
+
+void OptionParser::handle_long_opt(const string& optstr) {
+
+ _remaining.pop_front();
+ string opt, value;
+
+ size_t delim = optstr.find("=");
+ if (delim != string::npos) {
+ opt = optstr.substr(0, delim);
+ value = optstr.substr(delim+1);
+ } else
+ opt = optstr;
+
+ const Option& option = lookup_long_opt(opt);
+ if (option._nargs == 1 and delim == string::npos) {
+ if (not _remaining.empty()) {
+ value = _remaining.front();
+ _remaining.pop_front();
+ }
+ }
+
+ if (option._nargs == 1 and value == "")
+ error("--" + opt + " " + _("option requires an argument"));
+
+ process_opt(option, string("--") + opt, value);
+}
+
+Values& OptionParser::parse_args(const int argc, char const* const* const argv) {
+ if (prog() == "")
+ prog(basename(argv[0]));
+ return parse_args(&argv[1], &argv[argc]);
+}
+Values& OptionParser::parse_args(const vector<string>& v) {
+
+ _remaining.assign(v.begin(), v.end());
+
+ if (add_version_option() and version() != "") {
+ add_option("--version") .action("version") .help(_("show program's version number and exit"));
+ _opts.splice(_opts.begin(), _opts, --(_opts.end()));
+ }
+ if (add_help_option()) {
+ add_option("-h", "--help") .action("help") .help(_("show this help message and exit"));
+ _opts.splice(_opts.begin(), _opts, --(_opts.end()));
+ }
+
+ while (not _remaining.empty()) {
+ const string arg = _remaining.front();
+
+ if (arg == "--") {
+ _remaining.pop_front();
+ break;
+ }
+
+ if (arg.substr(0,2) == "--") {
+ handle_long_opt(arg.substr(2));
+ } else if (arg.substr(0,1) == "-" and arg.length() > 1) {
+ handle_short_opt(arg.substr(1,1), arg);
+ } else {
+ _remaining.pop_front();
+ _leftover.push_back(arg);
+ if (not interspersed_args())
+ break;
+ }
+ }
+ while (not _remaining.empty()) {
+ const string arg = _remaining.front();
+ _remaining.pop_front();
+ _leftover.push_back(arg);
+ }
+
+ for (strMap::const_iterator it = _defaults.begin(); it != _defaults.end(); ++it) {
+ if (not _values.is_set(it->first))
+ _values[it->first] = it->second;
+ }
+
+ for (list<Option>::const_iterator it = _opts.begin(); it != _opts.end(); ++it) {
+ if (it->get_default() != "" and not _values.is_set(it->dest()))
+ _values[it->dest()] = it->get_default();
+ }
+
+ return _values;
+}
+
+void OptionParser::process_opt(const Option& o, const string& opt, const string& value) {
+ if (o.action() == "store") {
+ string err = o.check_type(opt, value);
+ if (err != "")
+ error(err);
+ _values[o.dest()] = value;
+ _values.is_set_by_user(o.dest(), true);
+ }
+ else if (o.action() == "store_const") {
+ _values[o.dest()] = o.get_const();
+ _values.is_set_by_user(o.dest(), true);
+ }
+ else if (o.action() == "store_true") {
+ _values[o.dest()] = "1";
+ _values.is_set_by_user(o.dest(), true);
+ }
+ else if (o.action() == "store_false") {
+ _values[o.dest()] = "0";
+ _values.is_set_by_user(o.dest(), true);
+ }
+ else if (o.action() == "append") {
+ string err = o.check_type(opt, value);
+ if (err != "")
+ error(err);
+ _values[o.dest()] = value;
+ _values.all(o.dest()).push_back(value);
+ _values.is_set_by_user(o.dest(), true);
+ }
+ else if (o.action() == "append_const") {
+ _values[o.dest()] = o.get_const();
+ _values.all(o.dest()).push_back(o.get_const());
+ _values.is_set_by_user(o.dest(), true);
+ }
+ else if (o.action() == "count") {
+ _values[o.dest()] = str_inc(_values[o.dest()]);
+ _values.is_set_by_user(o.dest(), true);
+ }
+ else if (o.action() == "help") {
+ print_help();
+ std::exit(0);
+ }
+ else if (o.action() == "version") {
+ print_version();
+ std::exit(0);
+ }
+ else if (o.action() == "callback" && o.callback()) {
+ (*o.callback())(o, opt, value, *this);
+ }
+}
+
+string OptionParser::format_option_help(unsigned int indent /* = 2 */) const {
+ stringstream ss;
+
+ if (_opts.empty())
+ return ss.str();
+
+ for (list<Option>::const_iterator it = _opts.begin(); it != _opts.end(); ++it) {
+ if (it->help() != SUPPRESS_HELP)
+ ss << it->format_help(indent);
+ }
+
+ return ss.str();
+}
+
+string OptionParser::format_help() const {
+ stringstream ss;
+
+ if (usage() != SUPPRESS_USAGE)
+ ss << get_usage() << endl;
+
+ if (description() != "")
+ ss << str_format(description(), 0, cols()) << endl;
+
+ ss << _("Options") << ":" << endl;
+ ss << format_option_help();
+
+ for (list<OptionGroup const*>::const_iterator it = _groups.begin(); it != _groups.end(); ++it) {
+ const OptionGroup& group = **it;
+ ss << endl << " " << group.title() << ":" << endl;
+ if (group.group_description() != "")
+ ss << str_format(group.group_description(), 4, cols()) << endl;
+ ss << group.format_option_help(4);
+ }
+
+ if (epilog() != "")
+ ss << endl << str_format(epilog(), 0, cols());
+
+ return ss.str();
+}
+void OptionParser::print_help() const {
+ cout << format_help();
+}
+
+void OptionParser::set_usage(const string& u) {
+ string lower = u;
+ transform(lower.begin(), lower.end(), lower.begin(), ::tolower);
+ if (lower.compare(0, 7, "usage: ") == 0)
+ _usage = u.substr(7);
+ else
+ _usage = u;
+}
+string OptionParser::format_usage(const string& u) const {
+ stringstream ss;
+ ss << _("Usage") << ": " << u << endl;
+ return ss.str();
+}
+string OptionParser::get_usage() const {
+ if (usage() == SUPPRESS_USAGE)
+ return string("");
+ return format_usage(str_replace(usage(), "%prog", prog()));
+}
+void OptionParser::print_usage(ostream& out) const {
+ string u = get_usage();
+ if (u != "")
+ out << u << endl;
+}
+void OptionParser::print_usage() const {
+ print_usage(cout);
+}
+
+string OptionParser::get_version() const {
+ return str_replace(_version, "%prog", prog());
+}
+void OptionParser::print_version(ostream& out) const {
+ out << get_version() << endl;
+}
+void OptionParser::print_version() const {
+ print_version(cout);
+}
+
+void OptionParser::exit() const {
+ std::exit(2);
+}
+void OptionParser::error(const string& msg) const {
+ print_usage(cerr);
+ cerr << prog() << ": " << _("error") << ": " << msg << endl;
+ exit();
+}
+////////// } class OptionParser //////////
+
+////////// class Values { //////////
+const string& Values::operator[] (const string& d) const {
+ strMap::const_iterator it = _map.find(d);
+ static const string empty = "";
+ return (it != _map.end()) ? it->second : empty;
+}
+void Values::is_set_by_user(const string& d, bool yes) {
+ if (yes)
+ _userSet.insert(d);
+ else
+ _userSet.erase(d);
+}
+////////// } class Values //////////
+
+////////// class Option { //////////
+string Option::check_type(const string& opt, const string& val) const {
+ istringstream ss(val);
+ stringstream err;
+
+ if (type() == "int" || type() == "long") {
+ long t;
+ if (not (ss >> t))
+ err << _("option") << " " << opt << ": " << _("invalid integer value") << ": '" << val << "'";
+ }
+ else if (type() == "float" || type() == "double") {
+ double t;
+ if (not (ss >> t))
+ err << _("option") << " " << opt << ": " << _("invalid floating-point value") << ": '" << val << "'";
+ }
+ else if (type() == "choice") {
+ if (find(choices().begin(), choices().end(), val) == choices().end()) {
+ list<string> tmp = choices();
+ transform(tmp.begin(), tmp.end(), tmp.begin(), str_wrap("'"));
+ err << _("option") << " " << opt << ": " << _("invalid choice") << ": '" << val << "'"
+ << " (" << _("choose from") << " " << str_join(", ", tmp.begin(), tmp.end()) << ")";
+ }
+ }
+ else if (type() == "complex") {
+ complex<double> t;
+ if (not (ss >> t))
+ err << _("option") << " " << opt << ": " << _("invalid complex value") << ": '" << val << "'";
+ }
+
+ return err.str();
+}
+
+string Option::format_option_help(unsigned int indent /* = 2 */) const {
+
+ string mvar_short, mvar_long;
+ if (nargs() == 1) {
+ string mvar = metavar();
+ if (mvar == "") {
+ mvar = type();
+ transform(mvar.begin(), mvar.end(), mvar.begin(), ::toupper);
+ }
+ mvar_short = " " + mvar;
+ mvar_long = "=" + mvar;
+ }
+
+ stringstream ss;
+ ss << string(indent, ' ');
+
+ if (not _short_opts.empty()) {
+ ss << str_join_trans(", ", _short_opts.begin(), _short_opts.end(), str_wrap("-", mvar_short));
+ if (not _long_opts.empty())
+ ss << ", ";
+ }
+ if (not _long_opts.empty())
+ ss << str_join_trans(", ", _long_opts.begin(), _long_opts.end(), str_wrap("--", mvar_long));
+
+ if ( _short_opts.empty() && _long_opts.empty() )
+ ss << metavar();
+
+
+ return ss.str();
+}
+
+string Option::format_help(unsigned int indent /* = 2 */) const {
+ stringstream ss;
+ string h = format_option_help(indent);
+ unsigned int width = cols();
+ unsigned int opt_width = min(width*3/10, 36u);
+ bool indent_first = false;
+ ss << h;
+ // if the option list is too long, start a new paragraph
+ if (h.length() >= (opt_width-1)) {
+ ss << endl;
+ indent_first = true;
+ } else {
+ ss << string(opt_width - h.length(), ' ');
+ if (help() == "")
+ ss << endl;
+ }
+ if (help() != "") {
+ string help_str = (get_default() != "") ? str_replace(help(), "%default", get_default()) : help();
+ ss << str_format(help_str, opt_width, width, indent_first);
+ }
+ return ss.str();
+}
+
+Option& Option::action(const string& a) {
+ _action = a;
+ if (a == "store_const" || a == "store_true" || a == "store_false" ||
+ a == "append_const" || a == "count" || a == "help" || a == "version")
+ nargs(0);
+ return *this;
+}
+////////// } class Option //////////
+
+}
--- /dev/null
+/**
+ * Copyright (C) 2010 Johannes Weißl <jargon@molb.org>
+ * License: your favourite BSD-style license
+ *
+ * git clone http://github.com/weisslj/cpp-optparse.git
+ *
+ * This is yet another option parser for C++. It is modelled after the
+ * excellent Python optparse API. Although incomplete, anyone familiar to
+ * optparse should feel at home:
+ * http://docs.python.org/library/optparse.html
+ *
+ * Design decisions:
+ * - elegant and easy usage more important than speed / flexibility
+ * - shortness more important than feature completeness
+ * * no unicode
+ * * no checking for user programming errors
+ *
+ * Why not use getopt/getopt_long?
+ * - not C++ / not completely POSIX
+ * - too cumbersome to use, would need lot of additional code
+ *
+ * Why not use Boost.Program_options?
+ * - boost not installed on all target platforms (esp. cluster, HPC, ...)
+ * - too big to include just for option handling:
+ * 322 *.h (44750 lines) + 7 *.cpp (2078 lines)
+ *
+ * Why not use tclap/Opag/Options/CmdLine/Anyoption/Argument_helper/...?
+ * - no reason, writing one is faster than code inspection :-)
+ * - similarity to Python desired for faster learning curve
+ *
+ * Future work:
+ * - nargs > 1?
+ * - comments?
+ *
+ * Python only features:
+ * - conflict handlers
+ * - adding new actions
+ *
+ *
+ * Example:
+ *
+ * using optparse::OptionParser;
+ *
+ * OptionParser parser = OptionParser() .description("just an example");
+ *
+ * parser.add_option("-f", "--file") .dest("filename")
+ * .help("write report to FILE") .metavar("FILE");
+ * parser.add_option("-q", "--quiet")
+ * .action("store_false") .dest("verbose") .set_default("1")
+ * .help("don't print status messages to stdout");
+ *
+ * optparse::Values options = parser.parse_args(argc, argv);
+ * vector<string> args = parser.args();
+ *
+ * if (options.get("verbose"))
+ * cout << options["filename"] << endl;
+ *
+ */
+
+#ifndef OPTIONPARSER_H_
+#define OPTIONPARSER_H_
+
+#include <iostream>
+#include <list>
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace optparse {
+
+class OptionParser;
+class OptionGroup;
+class Option;
+class Values;
+class Value;
+class Callback;
+
+typedef std::map<std::string,std::string> strMap;
+typedef std::map<std::string,std::list<std::string> > lstMap;
+typedef std::map<std::string,Option const*> optMap;
+
+const char* const SUPPRESS_HELP = "SUPPRESS" "HELP";
+const char* const SUPPRESS_USAGE = "SUPPRESS" "USAGE";
+
+//! Class for automatic conversion from string -> anytype
+class Value {
+ public:
+ Value() : str(), valid(false) {}
+ Value(const std::string& v) : str(v), valid(true) {}
+ operator const char*() { return str.c_str(); }
+ operator bool() { bool t; return (valid && (std::istringstream(str) >> t)) ? t : false; }
+ operator short() { short t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+ operator unsigned short() { unsigned short t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+ operator int() { int t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+ operator unsigned int() { unsigned int t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+ operator long() { long t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+ operator unsigned long() { unsigned long t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+ operator float() { float t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+ operator double() { double t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+ operator long double() { long double t; return (valid && (std::istringstream(str) >> t)) ? t : 0; }
+ private:
+ const std::string str;
+ bool valid;
+};
+
+class Values {
+ public:
+ Values() : _map() {}
+ const std::string& operator[] (const std::string& d) const;
+ std::string& operator[] (const std::string& d) { return _map[d]; }
+ bool is_set(const std::string& d) const { return _map.find(d) != _map.end(); }
+ bool is_set_by_user(const std::string& d) const { return _userSet.find(d) != _userSet.end(); }
+ void is_set_by_user(const std::string& d, bool yes);
+ Value get(const std::string& d) const { return (is_set(d)) ? Value((*this)[d]) : Value(); }
+
+ typedef std::list<std::string>::iterator iterator;
+ typedef std::list<std::string>::const_iterator const_iterator;
+ std::list<std::string>& all(const std::string& d) { return _appendMap[d]; }
+ const std::list<std::string>& all(const std::string& d) const { return _appendMap.find(d)->second; }
+
+ private:
+ strMap _map;
+ lstMap _appendMap;
+ std::set<std::string> _userSet;
+};
+
+class OptionParser {
+ public:
+ OptionParser();
+ virtual ~OptionParser() {}
+
+ OptionParser& usage(const std::string& u) { set_usage(u); return *this; }
+ OptionParser& version(const std::string& v) { _version = v; return *this; }
+ OptionParser& description(const std::string& d) { _description = d; return *this; }
+ OptionParser& add_help_option(bool h) { _add_help_option = h; return *this; }
+ OptionParser& add_version_option(bool v) { _add_version_option = v; return *this; }
+ OptionParser& prog(const std::string& p) { _prog = p; return *this; }
+ OptionParser& epilog(const std::string& e) { _epilog = e; return *this; }
+ OptionParser& set_defaults(const std::string& dest, const std::string& val) {
+ _defaults[dest] = val; return *this;
+ }
+ OptionParser& enable_interspersed_args() { _interspersed_args = true; return *this; }
+ OptionParser& disable_interspersed_args() { _interspersed_args = false; return *this; }
+ OptionParser& add_option_group(const OptionGroup& group);
+
+ const std::string& usage() const { return _usage; }
+ const std::string& version() const { return _version; }
+ const std::string& description() const { return _description; }
+ bool add_help_option() const { return _add_help_option; }
+ bool add_version_option() const { return _add_version_option; }
+ const std::string& prog() const { return _prog; }
+ const std::string& epilog() const { return _epilog; }
+ bool interspersed_args() const { return _interspersed_args; }
+
+ Option& add_option(const std::string& opt);
+ Option& add_option(const std::string& opt1, const std::string& opt2);
+ Option& add_option(const std::string& opt1, const std::string& opt2, const std::string& opt3);
+ Option& add_option(const std::vector<std::string>& opt);
+
+ Values& parse_args(int argc, char const* const* argv);
+ Values& parse_args(const std::vector<std::string>& args);
+ template<typename InputIterator>
+ Values& parse_args(InputIterator begin, InputIterator end) {
+ return parse_args(std::vector<std::string>(begin, end));
+ }
+
+ const std::list<std::string>& args() const { return _leftover; }
+ std::vector<std::string> args() {
+ return std::vector<std::string>(_leftover.begin(), _leftover.end());
+ }
+
+ std::string format_help() const;
+ std::string format_option_help(unsigned int indent = 2) const;
+ void print_help() const;
+
+ void set_usage(const std::string& u);
+ std::string get_usage() const;
+ void print_usage(std::ostream& out) const;
+ void print_usage() const;
+
+ std::string get_version() const;
+ void print_version(std::ostream& out) const;
+ void print_version() const;
+
+ void error(const std::string& msg) const;
+ void exit() const;
+
+ private:
+ const Option& lookup_short_opt(const std::string& opt) const;
+ const Option& lookup_long_opt(const std::string& opt) const;
+
+ void handle_short_opt(const std::string& opt, const std::string& arg);
+ void handle_long_opt(const std::string& optstr);
+
+ void process_opt(const Option& option, const std::string& opt, const std::string& value);
+
+ std::string format_usage(const std::string& u) const;
+
+ std::string _usage;
+ std::string _version;
+ std::string _description;
+ bool _add_help_option;
+ bool _add_version_option;
+ std::string _prog;
+ std::string _epilog;
+ bool _interspersed_args;
+
+ Values _values;
+
+ std::list<Option> _opts;
+ optMap _optmap_s;
+ optMap _optmap_l;
+ strMap _defaults;
+ std::list<OptionGroup const*> _groups;
+
+ std::list<std::string> _remaining;
+ std::list<std::string> _leftover;
+};
+
+class OptionGroup : public OptionParser {
+ public:
+ OptionGroup(const OptionParser& p, const std::string& t, const std::string& d = "") :
+ _parser(p), _title(t), _group_description(d) {}
+ virtual ~OptionGroup() {}
+
+ OptionGroup& title(const std::string& t) { _title = t; return *this; }
+ OptionGroup& group_description(const std::string& d) { _group_description = d; return *this; }
+ const std::string& title() const { return _title; }
+ const std::string& group_description() const { return _group_description; }
+
+ private:
+ const OptionParser& _parser;
+ std::string _title;
+ std::string _group_description;
+};
+
+class Option {
+ public:
+ Option() : _action("store"), _type("string"), _nargs(1), _callback(0) {}
+ virtual ~Option() {}
+
+ Option& action(const std::string& a);
+ Option& type(const std::string& t) { _type = t; return *this; }
+ Option& dest(const std::string& d) { _dest = d; return *this; }
+ Option& set_default(const std::string& d) { _default = d; return *this; }
+ template<typename T>
+ Option& set_default(T t) { std::ostringstream ss; ss << t; _default = ss.str(); return *this; }
+ Option& nargs(size_t n) { _nargs = n; return *this; }
+ Option& set_const(const std::string& c) { _const = c; return *this; }
+ template<typename InputIterator>
+ Option& choices(InputIterator begin, InputIterator end) {
+ _choices.assign(begin, end); type("choice"); return *this;
+ }
+ template<typename InputEnumerable>
+ Option& choices(InputEnumerable enumerable) {
+ _choices.assign(enumerable.begin(), enumerable.end()); type("choice"); return *this;
+ }
+ Option& help(const std::string& h) { _help = h; return *this; }
+ Option& metavar(const std::string& m) { _metavar = m; return *this; }
+ Option& callback(Callback& c) { _callback = &c; return *this; }
+
+ const std::string& action() const { return _action; }
+ const std::string& type() const { return _type; }
+ const std::string& dest() const { return _dest; }
+ const std::string& get_default() const { return _default; }
+ size_t nargs() const { return _nargs; }
+ const std::string& get_const() const { return _const; }
+ const std::list<std::string>& choices() const { return _choices; }
+ const std::string& help() const { return _help; }
+ const std::string& metavar() const { return _metavar; }
+ Callback* callback() const { return _callback; }
+
+ private:
+ std::string check_type(const std::string& opt, const std::string& val) const;
+ std::string format_option_help(unsigned int indent = 2) const;
+ std::string format_help(unsigned int indent = 2) const;
+
+ std::set<std::string> _short_opts;
+ std::set<std::string> _long_opts;
+
+ std::string _action;
+ std::string _type;
+ std::string _dest;
+ std::string _default;
+ size_t _nargs;
+ std::string _const;
+ std::list<std::string> _choices;
+ std::string _help;
+ std::string _metavar;
+ Callback* _callback;
+
+ friend class OptionParser;
+};
+
+class Callback {
+public:
+ virtual void operator() (const Option& option, const std::string& opt, const std::string& val, const OptionParser& parser) = 0;
+ virtual ~Callback() {}
+};
+
+}
+
+#endif
--- /dev/null
+
+set(PbindexSrcDir ${PacBioBAM_ToolsDir}/pbindex/src)
+
+# create version header
+set(PbIndex_VERSION ${PacBioBAM_VERSION})
+configure_file(
+ ${PbindexSrcDir}/PbIndexVersion.h.in ${GeneratedDir}/PbIndexVersion.h @ONLY
+)
+
+# list source files
+set(PBINDEX_SOURCES
+ ${ToolsCommonDir}/OptionParser.cpp
+ ${PbindexSrcDir}/main.cpp
+ ${PbindexSrcDir}/PbIndex.cpp
+)
+
+# build pbindex executable
+include(PbbamTool)
+create_pbbam_tool(
+ TARGET pbindex
+ SOURCES ${PBINDEX_SOURCES}
+)
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "PbIndex.h"
+#include <pbbam/BamFile.h>
+#include <pbbam/PbiRawData.h>
+#include <iostream>
+#include <cassert>
+#include <cstdlib>
+using namespace pbindex;
+using namespace std;
+
+Settings::Settings(void)
+ : printPbiContents_(false)
+{ }
+
+int PbIndex::Create(const Settings& settings)
+{
+ try
+ {
+ PacBio::BAM::BamFile bamFile(settings.inputBamFilename_);
+ bamFile.CreatePacBioIndex();
+ return EXIT_SUCCESS;
+ }
+ catch (std::runtime_error& e)
+ {
+ cerr << "pbindex ERROR: " << e.what() << endl;
+ return EXIT_FAILURE;
+ }
+}
+
+//int PbIndex::Print(const Settings& settings)
+//{
+
+//}
+
+int PbIndex::Run(const Settings& settings)
+{
+// if (settings.printPbiContents_)
+// return Print(settings);
+// else
+ return Create(settings);
+}
+
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef PBINDEX_H
+#define PBINDEX_H
+
+#include <string>
+#include <vector>
+
+namespace pbindex {
+
+class Settings
+{
+public:
+ Settings(void);
+
+public:
+
+public:
+ std::string inputBamFilename_;
+ bool printPbiContents_;
+ std::vector<std::string> errors_;
+};
+
+class PbIndex
+{
+public:
+ static int Run(const Settings& settings);
+private:
+ static int Create(const Settings& settings);
+// static int Print(const Settings& settings);
+};
+
+} // namespace pbindex
+
+#endif // PBINDEX_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef PBINDEXVERSION_H
+#define PBINDEXVERSION_H
+
+#include <string>
+
+namespace pbindex {
+
+const std::string Version = std::string("@PbIndex_VERSION@");
+
+} // namespace pbindex
+
+#endif // PBINDEXVERSION_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "../common/OptionParser.h"
+#include "PbIndex.h"
+#include "PbIndexVersion.h"
+#include <cassert>
+#include <iostream>
+using namespace std;
+
+static
+pbindex::Settings fromCommandLine(optparse::OptionParser& parser,
+ int argc, char* argv[])
+{
+ const optparse::Values options = parser.parse_args(argc, argv);
+ (void)options;
+
+ pbindex::Settings settings;
+
+ // get input filename
+ const vector<string> positionalArgs = parser.args();
+ const size_t numPositionalArgs = positionalArgs.size();
+ if (numPositionalArgs == 0)
+ settings.errors_.push_back("pbindex requires an input BAM filename");
+ else if (numPositionalArgs == 1)
+ settings.inputBamFilename_ = parser.args().front();
+ else {
+ assert(numPositionalArgs > 1);
+ settings.errors_.push_back("pbindex does not support more than one input file per run");
+ }
+
+ return settings;
+}
+
+int main(int argc, char* argv[])
+{
+ // setup help & options
+ optparse::OptionParser parser;
+ parser.description("pbindex creates a index file that enables random-access to PacBio-specific data in BAM files. "
+ "Generated index filename will be the same as input BAM plus .pbi suffix."
+ );
+ parser.prog("pbindex");
+ parser.usage("pbindex <input>");
+ parser.version(pbindex::Version);
+ parser.add_version_option(true);
+ parser.add_help_option(true);
+
+ auto ioGroup = optparse::OptionGroup(parser, "Input/Output");
+ ioGroup.add_option("")
+ .dest("input")
+ .metavar("input")
+ .help("Input BAM file");
+ parser.add_option_group(ioGroup);
+
+ // parse command line for settings
+ const pbindex::Settings settings = fromCommandLine(parser, argc, argv);
+ if (!settings.errors_.empty()) {
+ cerr << endl;
+ for (const auto e : settings.errors_)
+ cerr << "ERROR: " << e << endl;
+ cerr << endl;
+ parser.print_help();
+ return EXIT_FAILURE;
+ }
+
+ // run tool
+ return pbindex::PbIndex::Run(settings);
+}
--- /dev/null
+
+set(PbindexdumpSrcDir ${PacBioBAM_ToolsDir}/pbindexdump/src)
+
+# create version header
+set(PbIndexDump_VERSION ${PacBioBAM_VERSION})
+configure_file(
+ ${PbindexdumpSrcDir}/PbIndexDumpVersion.h.in ${GeneratedDir}/PbIndexDumpVersion.h @ONLY
+)
+
+# list source files
+set(PBINDEXDUMP_SOURCES
+ ${ToolsCommonDir}/OptionParser.cpp
+ ${PbindexdumpSrcDir}/CppFormatter.cpp
+ ${PbindexdumpSrcDir}/JsonFormatter.cpp
+ ${PbindexdumpSrcDir}/PbIndexDump.cpp
+ ${PbindexdumpSrcDir}/main.cpp
+)
+
+# build pbindexdump executable
+include(PbbamTool)
+create_pbbam_tool(
+ TARGET pbindexdump
+ SOURCES ${PBINDEXDUMP_SOURCES}
+)
+
+# cram tests
+if (PacBioBAM_build_tests)
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbindexdump_json.t.in
+ ${GeneratedDir}/pbindexdump_json.t
+ )
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbindexdump_cpp.t.in
+ ${GeneratedDir}/pbindexdump_cpp.t
+ )
+
+ add_test(
+ NAME pbindexdump_CramTests
+ WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
+ COMMAND "python" cram.py
+ ${GeneratedDir}/pbindexdump_json.t
+ ${GeneratedDir}/pbindexdump_cpp.t
+ )
+
+endif()
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "CppFormatter.h"
+#include <pbbam/PbiRawData.h>
+
+#include <iostream>
+#include <sstream>
+
+using namespace pbindexdump;
+using namespace std;
+
+namespace pbindexdump {
+
+static
+string printCppReferenceData(const PacBio::BAM::PbiRawReferenceData& referenceData)
+{
+ auto result = string{ "" };
+ for (const PacBio::BAM::PbiReferenceEntry& entry : referenceData.entries_) {
+ if (!result.empty())
+ result.append(",\n");
+ result.append( string{ " PbiReferenceEntry{" }
+ + to_string(entry.tId_) + "," + to_string(entry.beginRow_) + "," + to_string(entry.endRow_)
+ + string{ "}" } );
+ }
+ if (!result.empty())
+ result.append("\n");
+ return result;
+}
+
+template<typename T>
+string printVectorElements(const std::vector<T>& c)
+{
+ stringstream s;
+ for (const auto& e : c)
+ s << e << ",";
+ auto result = s.str();
+ if (!result.empty())
+ result.pop_back(); // remove final comma
+ return result;
+}
+
+template<>
+string printVectorElements(const std::vector<uint8_t>& c)
+{
+ stringstream s;
+ for (const auto& e : c)
+ s << static_cast<uint16_t>(e) << ","; // cast to larger uint, force print as number not character
+ auto result = s.str();
+ if (!result.empty())
+ result.pop_back(); // remove final comma
+ return result;
+}
+
+template<>
+string printVectorElements(const std::vector<int8_t>& c)
+{
+ stringstream s;
+ for (const auto& e : c)
+ s << static_cast<int16_t>(e) << ","; // cast to larger int, force print as number not character
+ auto result = s.str();
+ if (!result.empty())
+ result.pop_back(); // remove final comma
+ return result;
+}
+
+} // namespace pbindexdump
+
+CppFormatter::CppFormatter(const Settings& settings)
+ : IFormatter(settings)
+{ }
+
+void CppFormatter::Run(void)
+{
+ using namespace PacBio::BAM;
+
+ const PbiRawData rawData{ settings_.inputPbiFilename_ };
+ const PbiRawBarcodeData& barcodeData = rawData.BarcodeData();
+ const PbiRawBasicData& basicData = rawData.BasicData();
+ const PbiRawMappedData& mappedData = rawData.MappedData();
+ const PbiRawReferenceData& referenceData = rawData.ReferenceData();
+
+ auto version = string{ };
+ switch (rawData.Version()) {
+ case PbiFile::Version_3_0_0 : version = "PbiFile::Version_3_0_0"; break;
+ case PbiFile::Version_3_0_1 : version = "PbiFile::Version_3_0_1"; break;
+ default:
+ throw runtime_error("unsupported PBI version encountered");
+ }
+
+ auto fileSections = string{ "PbiFile::BASIC" };
+ if (rawData.HasBarcodeData()) fileSections += string{ " | PbiFile::BARCODE" };
+ if (rawData.HasMappedData()) fileSections += string{ " | PbiFile::MAPPED" };
+ if (rawData.HasReferenceData()) fileSections += string{ " | PbiFile::REFERENCE" };
+
+ stringstream s;
+ s << "PbiRawData rawData;" << endl
+ << "rawData.Version(" << version << ");" << endl
+ << "rawData.FileSections(" << fileSections << ");" << endl
+ << "rawData.NumReads(" << rawData.NumReads() << ");" << endl
+ << endl
+ << "PbiRawBasicData& basicData = rawData.BasicData();" << endl
+ << "basicData.rgId_ = {" << printVectorElements(basicData.rgId_) << "};" << endl
+ << "basicData.qStart_ = {" << printVectorElements(basicData.qStart_) << "};" << endl
+ << "basicData.qEnd_ = {" << printVectorElements(basicData.qEnd_) << "};" << endl
+ << "basicData.holeNumber_ = {" << printVectorElements(basicData.holeNumber_) << "};" << endl
+ << "basicData.readQual_ = {" << printVectorElements(basicData.readQual_) << "};" << endl
+ << "basicData.ctxtFlag_ = {" << printVectorElements(basicData.ctxtFlag_) << "};" << endl
+ << "basicData.fileOffset_ = {" << printVectorElements(basicData.fileOffset_) << "};" << endl
+ << endl;
+
+ if (rawData.HasBarcodeData()) {
+ s << "PbiRawBarcodeData& barcodeData = rawData.BarcodeData();" << endl
+ << "barcodeData.bcForward_ = {" << printVectorElements(barcodeData.bcForward_) << "};" << endl
+ << "barcodeData.bcReverse_ = {" << printVectorElements(barcodeData.bcReverse_) << "};" << endl
+ << "barcodeData.bcQual_ = {" << printVectorElements(barcodeData.bcQual_) << "};" << endl
+ << endl;
+ }
+
+ if (rawData.HasMappedData()) {
+ s << "PbiRawMappedData& mappedData = rawData.MappedData();" << endl
+ << "mappedData.tId_ = {" << printVectorElements(mappedData.tId_) << "};" << endl
+ << "mappedData.tStart_ = {" << printVectorElements(mappedData.tStart_) << "};" << endl
+ << "mappedData.tEnd_ = {" << printVectorElements(mappedData.tEnd_) << "};" << endl
+ << "mappedData.aStart_ = {" << printVectorElements(mappedData.aStart_) << "};" << endl
+ << "mappedData.aEnd_ = {" << printVectorElements(mappedData.aEnd_) << "};" << endl
+ << "mappedData.revStrand_ = {" << printVectorElements(mappedData.revStrand_) << "};" << endl
+ << "mappedData.nM_ = {" << printVectorElements(mappedData.nM_) << "};" << endl
+ << "mappedData.nMM_ = {" << printVectorElements(mappedData.nMM_) << "};" << endl
+ << "mappedData.mapQV_ = {" << printVectorElements(mappedData.mapQV_) << "};" << endl
+ << endl;
+ }
+
+ if (rawData.HasReferenceData()) {
+ s << "PbiRawReferenceData& referenceData = rawData.ReferenceData();" << endl
+ << "referenceData.entries_ = { " << endl
+ << printCppReferenceData(referenceData)
+ << "};" << endl
+ << endl;
+ }
+
+ cout << s.str() << endl;
+}
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef CPPFORMATTER_H
+#define CPPFORMATTER_H
+
+#include "IFormatter.h"
+
+namespace pbindexdump {
+
+class CppFormatter : public IFormatter
+{
+public:
+ CppFormatter(const Settings& settings);
+ void Run(void);
+};
+
+} // namespace pbindexdump
+
+#endif // CPPFORMATTER_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef IFORMATTER_H
+#define IFORMATTER_H
+
+#include "Settings.h"
+
+namespace pbindexdump {
+
+class IFormatter
+{
+public:
+ ~IFormatter(void) { }
+
+public:
+ virtual void Run(void) =0;
+
+protected:
+ const Settings& settings_;
+
+protected:
+ IFormatter(const Settings& settings)
+ : settings_(settings)
+ { }
+};
+
+} // namespace pbindexdump
+
+#endif // IFORMATTER_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "JsonFormatter.h"
+#include "json.hpp"
+#include <pbbam/PbiFile.h>
+#include <iostream>
+#include <sstream>
+using namespace pbindexdump;
+using namespace PacBio::BAM;
+using namespace std;
+
+namespace pbindexdump {
+
+
+} // namespace pbindexdump
+
+JsonFormatter::JsonFormatter(const Settings& settings)
+ : IFormatter(settings)
+ , index_(settings.inputPbiFilename_)
+{ }
+
+void JsonFormatter::FormatMetadata(void)
+{
+ auto version = string{ };
+ switch (index_.Version()) {
+ case PbiFile::Version_3_0_0 : version = "3.0.0"; break;
+ case PbiFile::Version_3_0_1 : version = "3.0.1"; break;
+ default:
+ throw runtime_error("unsupported PBI version encountered");
+ }
+
+ nlohmann::json fileSections;
+ fileSections.push_back("BasicData");
+ if (index_.HasBarcodeData()) fileSections.push_back("BarcodeData");
+ if (index_.HasMappedData()) fileSections.push_back("MappedData");
+ if (index_.HasReferenceData()) fileSections.push_back("ReferenceData");
+
+ json_["version"] = version;
+ json_["fileSections"] = fileSections;
+ json_["numReads"] = index_.NumReads();
+}
+
+void JsonFormatter::FormatRaw(void)
+{
+ const PbiRawBasicData& basicData = index_.BasicData();
+ json_["basicData"]["rgId"] = basicData.rgId_;
+ json_["basicData"]["qStart"] = basicData.qStart_;
+ json_["basicData"]["qEnd"] = basicData.qEnd_;
+ json_["basicData"]["holeNumber"] = basicData.holeNumber_;
+ json_["basicData"]["readQual"] = basicData.readQual_;
+ json_["basicData"]["ctxtFlag"] = basicData.ctxtFlag_;
+ json_["basicData"]["fileOffset"] = basicData.fileOffset_;
+
+ if (index_.HasBarcodeData()) {
+ const PbiRawBarcodeData& barcodeData = index_.BarcodeData();
+ json_["barcodeData"]["bcForward"] = barcodeData.bcForward_;
+ json_["barcodeData"]["bcReverse"] = barcodeData.bcReverse_;
+ json_["barcodeData"]["bcQuality"] = barcodeData.bcQual_;
+ }
+
+ if (index_.HasMappedData()) {
+ const PbiRawMappedData& mappedData = index_.MappedData();
+
+ // casts to force -1 if unmapped
+ json_["mappedData"]["tId"] = mappedData.tId_;
+ json_["mappedData"]["tStart"] = mappedData.tStart_;
+ json_["mappedData"]["tEnd"] = mappedData.tEnd_;
+
+ json_["mappedData"]["aStart"] = mappedData.aStart_;
+ json_["mappedData"]["aEnd"] = mappedData.aEnd_;
+ json_["mappedData"]["revStrand"] = mappedData.revStrand_;
+ json_["mappedData"]["nM"] = mappedData.nM_;
+ json_["mappedData"]["nMM"] = mappedData.nMM_;
+ json_["mappedData"]["mapQV"] = mappedData.mapQV_;
+ }
+}
+
+void JsonFormatter::FormatRecords(void)
+{
+ nlohmann::json reads;
+ const uint32_t numReads = index_.NumReads();
+ const bool hasBarcodeData = index_.HasBarcodeData();
+ const bool hasMappedData = index_.HasMappedData();
+ for (uint32_t i = 0; i < numReads; ++i) {
+
+ nlohmann::json read;
+
+ // common data
+ const PbiRawBasicData& basicData = index_.BasicData();
+ read["rgId"] = basicData.rgId_[i];
+ read["qStart"] = basicData.qStart_[i];
+ read["qEnd"] = basicData.qEnd_[i];
+ read["holeNumber"] = basicData.holeNumber_[i];
+ read["readQuality"] = basicData.readQual_[i];
+ read["contextFlag"] = basicData.ctxtFlag_[i];
+ read["fileOffset"] = basicData.fileOffset_[i];
+
+ // barcode data, if present
+ if (hasBarcodeData) {
+ const PbiRawBarcodeData& barcodeData = index_.BarcodeData();
+ read["bcForward"] = barcodeData.bcForward_[i];
+ read["bcReverse"] = barcodeData.bcReverse_[i];
+ read["bcQuality"] = barcodeData.bcQual_[i];
+ }
+
+ // mapping data, if present
+ if (hasMappedData) {
+ const PbiRawMappedData& mappedData = index_.MappedData();
+
+ // casts to force -1 if unmapped
+ read["tId"] = static_cast<int32_t>(mappedData.tId_[i]);
+ read["tStart"] = static_cast<int32_t>(mappedData.tStart_[i]);
+ read["tEnd"] = static_cast<int32_t>(mappedData.tEnd_[i]);
+
+ read["aStart"] = mappedData.aStart_[i];
+ read["aEnd"] = mappedData.aEnd_[i];
+ read["nM"] = mappedData.nM_[i];
+ read["nMM"] = mappedData.nMM_[i];
+ read["mapQuality"] = mappedData.mapQV_[i];
+ read["reverseStrand"] = mappedData.revStrand_[i];
+ }
+
+ reads.push_back(std::move(read));
+ }
+ json_["reads"] = reads;
+}
+
+void JsonFormatter::FormatReferences(void)
+{
+ if (index_.HasReferenceData()) {
+ const PbiRawReferenceData& referenceData = index_.ReferenceData();
+ nlohmann::json references;
+ for (const PbiReferenceEntry& entry : referenceData.entries_) {
+ nlohmann::json element;
+ element["tId"] = static_cast<int32_t>(entry.tId_);
+ element["beginRow"] = static_cast<int32_t>(entry.beginRow_);
+ element["endRow"] = static_cast<int32_t>(entry.endRow_);
+ references.push_back(std::move(element));
+ }
+ json_["references"] = references;
+ }
+}
+
+void JsonFormatter::Print(void)
+{
+ cout << json_.dump(settings_.jsonIndentLevel_) << endl;
+}
+
+void JsonFormatter::Run(void)
+{
+ FormatMetadata();
+ FormatReferences();
+
+ if (settings_.jsonRaw_)
+ FormatRaw();
+ else
+ FormatRecords();
+
+ Print();
+}
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef JSONFORMATTER_H
+#define JSONFORMATTER_H
+
+#include "IFormatter.h"
+#include "json.hpp"
+#include <pbbam/PbiRawData.h>
+
+namespace pbindexdump {
+
+class JsonFormatter : public IFormatter
+{
+public:
+ JsonFormatter(const Settings& settings);
+ void Run(void);
+
+private:
+ void FormatMetadata(void);
+ void FormatReferences(void);
+
+ void FormatRaw(void);
+ void FormatRecords(void);
+
+ void Print(void);
+
+private:
+ PacBio::BAM::PbiRawData index_;
+ nlohmann::json json_;
+};
+
+} // namespace pbindexdump
+
+#endif // JSONFORMATTER_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "PbIndexDump.h"
+#include "CppFormatter.h"
+#include "JsonFormatter.h"
+#include <cassert>
+using namespace pbindexdump;
+using namespace std;
+
+void PbIndexDump::Run(const Settings& settings)
+{
+ std::unique_ptr<IFormatter> formatter(nullptr);
+ if (settings.format_ == "json") formatter.reset(new JsonFormatter(settings));
+ else if (settings.format_ == "cpp") formatter.reset(new CppFormatter(settings));
+ else {
+ string msg = { "unsupported output format requested: " };
+ msg += settings.format_;
+ throw runtime_error(msg);
+ }
+ assert(formatter);
+ formatter->Run();
+}
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef PBINDEXDUMP_H
+#define PBINDEXDUMP_H
+
+namespace pbindexdump {
+
+class Settings;
+
+class PbIndexDump
+{
+public:
+ static void Run(const Settings& settings);
+};
+
+} // namespace pbindex
+
+#endif // PBINDEXDUMP_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef PBINDEXDUMPVERSION_H
+#define PBINDEXDUMPVERSION_H
+
+#include <string>
+
+namespace pbindexdump {
+
+const std::string Version = std::string("@PbIndexDump_VERSION@");
+
+} // namespace pbindexdump
+
+#endif // PBINDEXDUMPVERSION_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef SETTINGS_H
+#define SETTINGS_H
+
+#include <string>
+#include <vector>
+
+namespace pbindexdump {
+
+class Settings
+{
+public:
+ Settings(void)
+ : format_("json")
+ , jsonIndentLevel_(4)
+ , jsonRaw_(false)
+ { }
+
+public:
+ std::string inputPbiFilename_;
+ std::string format_;
+ int jsonIndentLevel_;
+ bool jsonRaw_;
+ std::vector<std::string> errors_;
+};
+
+} // namespace pbindexdump
+
+#endif // SETTINGS_H
--- /dev/null
+/*!
+@mainpage
+
+These pages contain the API documentation of JSON for Modern C++, a C++11
+header-only JSON class.
+
+Class @ref nlohmann::basic_json is a good entry point for the documentation.
+
+@copyright The code is licensed under the [MIT
+ License](http://opensource.org/licenses/MIT):
+ <br>
+ Copyright © 2013-2015 Niels Lohmann.
+ <br>
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation files
+ (the "Software"), to deal in the Software without restriction,
+ including without limitation the rights to use, copy, modify, merge,
+ publish, distribute, sublicense, and/or sell copies of the Software,
+ and to permit persons to whom the Software is furnished to do so,
+ subject to the following conditions:
+ <br>
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+ <br>
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+
+@author [Niels Lohmann](http://nlohmann.me)
+@see https://github.com/nlohmann/json to download the source code
+*/
+
+#ifndef NLOHMANN_JSON_HPP
+#define NLOHMANN_JSON_HPP
+
+#include <algorithm>
+#include <array>
+#include <ciso646>
+#include <cmath>
+#include <cstdio>
+#include <functional>
+#include <initializer_list>
+#include <iomanip>
+#include <iostream>
+#include <iterator>
+#include <limits>
+#include <map>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+// enable ssize_t on MinGW
+#ifdef __GNUC__
+ #ifdef __MINGW32__
+ #include <sys/types.h>
+ #endif
+#endif
+
+// enable ssize_t for MSVC
+#ifdef _MSC_VER
+ #include <basetsd.h>
+ using ssize_t = SSIZE_T;
+#endif
+
+/*!
+@brief namespace for Niels Lohmann
+@see https://github.com/nlohmann
+*/
+namespace nlohmann
+{
+
+
+/*!
+@brief unnamed namespace with internal helper functions
+*/
+namespace
+{
+/*!
+@brief Helper to determine whether there's a key_type for T.
+@sa http://stackoverflow.com/a/7728728/266378
+*/
+template<typename T>
+struct has_mapped_type
+{
+ private:
+ template<typename C> static char test(typename C::mapped_type*);
+ template<typename C> static int test(...);
+ public:
+ enum { value = sizeof(test<T>(0)) == sizeof(char) };
+};
+
+/// "equality" comparison for floating point numbers
+template<typename T>
+static bool approx(const T a, const T b)
+{
+ return not (a > b or a < b);
+}
+}
+
+/*!
+@brief a class to store JSON values
+
+@tparam ObjectType type for JSON objects (@c std::map by default; will be used
+in @ref object_t)
+@tparam ArrayType type for JSON arrays (@c std::vector by default; will be used
+in @ref array_t)
+@tparam StringType type for JSON strings and object keys (@c std::string by
+default; will be used in @ref string_t)
+@tparam BooleanType type for JSON booleans (@c `bool` by default; will be used
+in @ref boolean_t)
+@tparam NumberIntegerType type for JSON integer numbers (@c `int64_t` by
+default; will be used in @ref number_integer_t)
+@tparam NumberFloatType type for JSON floating-point numbers (@c `double` by
+default; will be used in @ref number_float_t)
+@tparam AllocatorType type of the allocator to use (@c `std::allocator` by
+default)
+
+@requirement The class satisfies the following concept requirements:
+- Basic
+ - [DefaultConstructible](http://en.cppreference.com/w/cpp/concept/DefaultConstructible):
+ JSON values can be default constructed. The result will be a JSON null value.
+ - [MoveConstructible](http://en.cppreference.com/w/cpp/concept/MoveConstructible):
+ A JSON value can be constructed from an rvalue argument.
+ - [CopyConstructible](http://en.cppreference.com/w/cpp/concept/CopyConstructible):
+ A JSON value can be copy-constrcuted from an lvalue expression.
+ - [MoveAssignable](http://en.cppreference.com/w/cpp/concept/MoveAssignable):
+ A JSON value van be assigned from an rvalue argument.
+ - [CopyAssignable](http://en.cppreference.com/w/cpp/concept/CopyAssignable):
+ A JSON value can be copy-assigned from an lvalue expression.
+ - [Destructible](http://en.cppreference.com/w/cpp/concept/Destructible):
+ JSON values can be destructed.
+- Layout
+ - [StandardLayoutType](http://en.cppreference.com/w/cpp/concept/StandardLayoutType):
+ JSON values have
+ [standard layout](http://en.cppreference.com/w/cpp/language/data_members#Standard_layout):
+ All non-static data members are private and standard layout types, the class
+ has no virtual functions or (virtual) base classes.
+- Library-wide
+ - [EqualityComparable](http://en.cppreference.com/w/cpp/concept/EqualityComparable):
+ JSON values can be compared with `==`, see @ref
+ operator==(const_reference,const_reference).
+ - [LessThanComparable](http://en.cppreference.com/w/cpp/concept/LessThanComparable):
+ JSON values can be compared with `<`, see @ref
+ operator<(const_reference,const_reference).
+ - [Swappable](http://en.cppreference.com/w/cpp/concept/Swappable):
+ Any JSON lvalue or rvalue of can be swapped with any lvalue or rvalue of
+ other compatible types, using unqualified function call @ref swap().
+ - [NullablePointer](http://en.cppreference.com/w/cpp/concept/NullablePointer):
+ JSON values can be compared against `std::nullptr_t` objects which are used
+ to model the `null` value.
+- Container
+ - [Container](http://en.cppreference.com/w/cpp/concept/Container):
+ JSON values can be used like STL containers and provide iterator access.
+ - [ReversibleContainer](http://en.cppreference.com/w/cpp/concept/ReversibleContainer);
+ JSON values can be used like STL containers and provide reverse iterator
+ access.
+
+@internal
+@note ObjectType trick from http://stackoverflow.com/a/9860911
+@endinternal
+
+@see RFC 7159 <http://rfc7159.net/rfc7159>
+*/
+template <
+ template<typename U, typename V, typename... Args> class ObjectType = std::map,
+ template<typename U, typename... Args> class ArrayType = std::vector,
+ class StringType = std::string,
+ class BooleanType = bool,
+ class NumberIntegerType = int64_t,
+ class NumberFloatType = double,
+ template<typename U> class AllocatorType = std::allocator
+ >
+class basic_json
+{
+ private:
+ /// workaround type for MSVC
+ using basic_json_t = basic_json<ObjectType,
+ ArrayType,
+ StringType,
+ BooleanType,
+ NumberIntegerType,
+ NumberFloatType,
+ AllocatorType>;
+
+ public:
+
+ /////////////////////
+ // container types //
+ /////////////////////
+
+ /// @name container types
+ /// @{
+
+ /// the type of elements in a basic_json container
+ using value_type = basic_json;
+
+ /// the type of an element reference
+ using reference = value_type&;
+
+ /// the type of an element const reference
+ using const_reference = const value_type&;
+
+ /// a type to represent differences between iterators
+ using difference_type = std::ptrdiff_t;
+
+ /// a type to represent container sizes
+ using size_type = std::size_t;
+
+ /// the allocator type
+ using allocator_type = AllocatorType<basic_json>;
+
+ /// the type of an element pointer
+ using pointer = typename std::allocator_traits<allocator_type>::pointer;
+ /// the type of an element const pointer
+ using const_pointer = typename std::allocator_traits<allocator_type>::const_pointer;
+
+ // forward declaration
+ template<typename Base> class json_reverse_iterator;
+
+ /// an iterator for a basic_json container
+ class iterator;
+ /// a const iterator for a basic_json container
+ class const_iterator;
+ /// a reverse iterator for a basic_json container
+ using reverse_iterator = json_reverse_iterator<typename basic_json::iterator>;
+ /// a const reverse iterator for a basic_json container
+ using const_reverse_iterator = json_reverse_iterator<typename basic_json::const_iterator>;
+
+ /// @}
+
+
+ /*!
+ @brief returns the allocator associated with the container
+ */
+ static allocator_type get_allocator()
+ {
+ return allocator_type();
+ }
+
+
+ ///////////////////////////
+ // JSON value data types //
+ ///////////////////////////
+
+ /// @name JSON value data types
+ /// @{
+
+ /*!
+ @brief a type for an object
+
+ [RFC 7159](http://rfc7159.net/rfc7159) describes JSON objects as follows:
+ > An object is an unordered collection of zero or more name/value pairs,
+ > where a name is a string and a value is a string, number, boolean, null,
+ > object, or array.
+
+ To store objects in C++, a type is defined by the template parameters @a
+ ObjectType which chooses the container (e.g., `std::map` or
+ `std::unordered_map`), @a StringType which chooses the type of the keys or
+ names, and @a AllocatorType which chooses the allocator to use.
+
+ #### Default type
+
+ With the default values for @a ObjectType (`std::map`), @a StringType
+ (`std::string`), and @a AllocatorType (`std::allocator`), the default value
+ for @a object_t is:
+
+ @code {.cpp}
+ std::map<
+ std::string, // key_type
+ basic_json, // value_type
+ std::less<std::string>, // key_compare
+ std::allocator<std::pair<const std::string, basic_json>> // allocator_type
+ >
+ @endcode
+
+ #### Behavior
+
+ The choice of @a object_t influences the behavior of the JSON class. With
+ the default type, objects have the following behavior:
+
+ - When all names are unique, objects will be interoperable in the sense
+ that all software implementations receiving that object will agree on the
+ name-value mappings.
+ - When the names within an object are not unique, later stored name/value
+ pairs overwrite previously stored name/value pairs, leaving the used
+ names unique. For instance, `{"key": 1}` and `{"key": 2, "key": 1}` will
+ be treated as equal and both stored as `{"key": 1}`.
+ - Internally, name/value pairs are stored in lexicographical order of the
+ names. Objects will also be serialized (see @ref dump) in this order. For
+ instance, `{"b": 1, "a": 2}` and `{"a": 2, "b": 1}` will be stored and
+ serialized as `{"a": 2, "b": 1}`.
+ - When comparing objects, the order of the name/value pairs is irrelevant.
+ This makes objects interoperable in the sense that they will not be
+ affected by these differences. For instance, `{"b": 1, "a": 2}` and
+ `{"a": 2, "b": 1}` will be treated as equal.
+
+ #### Limits
+
+ [RFC 7159](http://rfc7159.net/rfc7159) specifies:
+ > An implementation may set limits on the maximum depth of nesting.
+
+ In this class, the object's limit of nesting is not constraint explicitly.
+ However, a maximum depth of nesting may be introduced by the compiler or
+ runtime environment. A theoretical limit can be queried by calling the @ref
+ max_size function of a JSON object.
+
+ #### Storage
+
+ Objects are stored as pointers in a `basic_json` type. That is, for any
+ access to object values, a pointer of type `object_t*` must be dereferenced.
+
+ @sa array_t
+ */
+ using object_t = ObjectType<StringType,
+ basic_json,
+ std::less<StringType>,
+ AllocatorType<std::pair<const StringType,
+ basic_json>>>;
+
+ /*!
+ @brief a type for an array
+
+ [RFC 7159](http://rfc7159.net/rfc7159) describes JSON arrays as follows:
+ > An array is an ordered sequence of zero or more values.
+
+ To store objects in C++, a type is defined by the template parameters @a
+ ArrayType which chooses the container (e.g., `std::vector` or `std::list`)
+ and @a AllocatorType which chooses the allocator to use.
+
+ #### Default type
+
+ With the default values for @a ArrayType (`std::vector`) and @a
+ AllocatorType (`std::allocator`), the default value for @a array_t is:
+
+ @code {.cpp}
+ std::vector<
+ basic_json, // value_type
+ std::allocator<basic_json> // allocator_type
+ >
+ @endcode
+
+ #### Limits
+
+ [RFC 7159](http://rfc7159.net/rfc7159) specifies:
+ > An implementation may set limits on the maximum depth of nesting.
+
+ In this class, the array's limit of nesting is not constraint explicitly.
+ However, a maximum depth of nesting may be introduced by the compiler or
+ runtime environment. A theoretical limit can be queried by calling the @ref
+ max_size function of a JSON array.
+
+ #### Storage
+
+ Arrays are stored as pointers in a `basic_json` type. That is, for any
+ access to array values, a pointer of type `array_t*` must be dereferenced.
+ */
+ using array_t = ArrayType<basic_json, AllocatorType<basic_json>>;
+
+ /*!
+ @brief a type for a string
+
+ [RFC 7159](http://rfc7159.net/rfc7159) describes JSON strings as follows:
+ > A string is a sequence of zero or more Unicode characters.
+
+ To store objects in C++, a type is defined by the template parameters @a
+ StringType which chooses the container (e.g., `std::string`) to use.
+
+ Unicode values are split by the JSON class into byte-sized characters
+ during deserialization.
+
+ #### Default type
+
+ With the default values for @a StringType (`std::string`), the default
+ value for @a string_t is:
+
+ @code {.cpp}
+ std::string
+ @endcode
+
+ #### String comparison
+
+ [RFC 7159](http://rfc7159.net/rfc7159) states:
+ > Software implementations are typically required to test names of object
+ > members for equality. Implementations that transform the textual
+ > representation into sequences of Unicode code units and then perform the
+ > comparison numerically, code unit by code unit, are interoperable in the
+ > sense that implementations will agree in all cases on equality or
+ > inequality of two strings. For example, implementations that compare
+ > strings with escaped characters unconverted may incorrectly find that
+ > `"a\\b"` and `"a\u005Cb"` are not equal.
+
+ This implementation is interoperable as it does compare strings code unit
+ by code unit.
+
+ #### Storage
+
+ String values are stored as pointers in a `basic_json` type. That is, for
+ any access to string values, a pointer of type `string_t*` must be
+ dereferenced.
+ */
+ using string_t = StringType;
+
+ /*!
+ @brief a type for a boolean
+
+ [RFC 7159](http://rfc7159.net/rfc7159) implicitly describes a boolean as a
+ type which differentiates the two literals `true` and `false`.
+
+ To store objects in C++, a type is defined by the template parameter @a
+ BooleanType which chooses the type to use.
+
+ #### Default type
+
+ With the default values for @a BooleanType (`bool`), the default value for
+ @a boolean_t is:
+
+ @code {.cpp}
+ bool
+ @endcode
+
+ #### Storage
+
+ Boolean values are stored directly inside a `basic_json` type.
+ */
+ using boolean_t = BooleanType;
+
+ /*!
+ @brief a type for a number (integer)
+
+ [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows:
+ > The representation of numbers is similar to that used in most programming
+ > languages. A number is represented in base 10 using decimal digits. It
+ > contains an integer component that may be prefixed with an optional minus
+ > sign, which may be followed by a fraction part and/or an exponent part.
+ > Leading zeros are not allowed. (...) Numeric values that cannot be
+ > represented in the grammar below (such as Infinity and NaN) are not
+ > permitted.
+
+ This description includes both integer and floating-point numbers. However,
+ C++ allows more precise storage if it is known whether the number is an
+ integer or a floating-point number. Therefore, two different types, @ref
+ number_integer_t and @ref number_float_t are used.
+
+ To store integer numbers in C++, a type is defined by the template
+ parameter @a NumberIntegerType which chooses the type to use.
+
+ #### Default type
+
+ With the default values for @a NumberIntegerType (`int64_t`), the default
+ value for @a number_integer_t is:
+
+ @code {.cpp}
+ int64_t
+ @endcode
+
+ #### Default behavior
+
+ - The restrictions about leading zeros is not enforced in C++. Instead,
+ leading zeros in integer literals lead to an interpretation as octal
+ number. Internally, the value will be stored as decimal number. For
+ instance, the C++ integer literal `010` will be serialized to `8`. During
+ deserialization, leading zeros yield an error.
+ - Not-a-number (NaN) values will be serialized to `null`.
+
+ #### Limits
+
+ [RFC 7159](http://rfc7159.net/rfc7159) specifies:
+ > An implementation may set limits on the range and precision of numbers.
+
+ When the default type is used, the maximal integer number that can be
+ stored is `9223372036854775807` (INT64_MAX) and the minimal integer number
+ that can be stored is `-9223372036854775808` (INT64_MIN). Integer numbers
+ that are out of range will yield over/underflow when used in a constructor.
+ During deserialization, too large or small integer numbers will be
+ automatically be stored as @ref number_float_t.
+
+ [RFC 7159](http://rfc7159.net/rfc7159) further states:
+ > Note that when such software is used, numbers that are integers and are
+ > in the range \f$[-2^{53}+1, 2^{53}-1]\f$ are interoperable in the sense
+ > that implementations will agree exactly on their numeric values.
+
+ As this range is a subrange of the exactly supported range [INT64_MIN,
+ INT64_MAX], this class's integer type is interoperable.
+
+ #### Storage
+
+ Integer number values are stored directly inside a `basic_json` type.
+ */
+ using number_integer_t = NumberIntegerType;
+
+ /*!
+ @brief a type for a number (floating-point)
+
+ [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows:
+ > The representation of numbers is similar to that used in most programming
+ > languages. A number is represented in base 10 using decimal digits. It
+ > contains an integer component that may be prefixed with an optional minus
+ > sign, which may be followed by a fraction part and/or an exponent part.
+ > Leading zeros are not allowed. (...) Numeric values that cannot be
+ > represented in the grammar below (such as Infinity and NaN) are not
+ > permitted.
+
+ This description includes both integer and floating-point numbers. However,
+ C++ allows more precise storage if it is known whether the number is an
+ integer or a floating-point number. Therefore, two different types, @ref
+ number_integer_t and @ref number_float_t are used.
+
+ To store floating-point numbers in C++, a type is defined by the template
+ parameter @a NumberFloatType which chooses the type to use.
+
+ #### Default type
+
+ With the default values for @a NumberFloatType (`double`), the default
+ value for @a number_float_t is:
+
+ @code {.cpp}
+ double
+ @endcode
+
+ #### Default behavior
+
+ - The restrictions about leading zeros is not enforced in C++. Instead,
+ leading zeros in floating-point literals will be ignored. Internally, the
+ value will be stored as decimal number. For instance, the C++
+ floating-point literal `01.2` will be serialized to `1.2`. During
+ deserialization, leading zeros yield an error.
+ - Not-a-number (NaN) values will be serialized to `null`.
+
+ #### Limits
+
+ [RFC 7159](http://rfc7159.net/rfc7159) states:
+ > This specification allows implementations to set limits on the range and
+ > precision of numbers accepted. Since software that implements IEEE
+ > 754-2008 binary64 (double precision) numbers is generally available and
+ > widely used, good interoperability can be achieved by implementations that
+ > expect no more precision or range than these provide, in the sense that
+ > implementations will approximate JSON numbers within the expected
+ > precision.
+
+ This implementation does exactly follow this approach, as it uses double
+ precision floating-point numbers. Note values smaller than
+ `-1.79769313486232e+308` and values greather than `1.79769313486232e+308`
+ will be stored as NaN internally and be serialized to `null`.
+
+ #### Storage
+
+ Floating-point number values are stored directly inside a `basic_json` type.
+ */
+ using number_float_t = NumberFloatType;
+
+ /// @}
+
+
+ ///////////////////////////
+ // JSON type enumeration //
+ ///////////////////////////
+
+ /*!
+ @brief the JSON type enumeration
+
+ This enumeration collects the different JSON types. It is internally used
+ to distinguish the stored values, and the functions is_null, is_object,
+ is_array, is_string, is_boolean, is_number, and is_discarded rely on it.
+ */
+ enum class value_t : uint8_t
+ {
+ null, ///< null value
+ object, ///< object (unordered set of name/value pairs)
+ array, ///< array (ordered collection of values)
+ string, ///< string value
+ boolean, ///< boolean value
+ number_integer, ///< number value (integer)
+ number_float, ///< number value (floating-point)
+ discarded ///< discarded by the the parser callback function
+ };
+
+
+ private:
+ /// helper for exception-safe object creation
+ template<typename T, typename... Args>
+ static T* create( Args&& ... args )
+ {
+ AllocatorType<T> alloc;
+ auto deleter = [&](T * object)
+ {
+ alloc.deallocate(object, 1);
+ };
+ std::unique_ptr<T, decltype(deleter)> object(alloc.allocate(1), deleter);
+ alloc.construct(object.get(), std::forward<Args>(args)...);
+ return object.release();
+ }
+
+ ////////////////////////
+ // JSON value storage //
+ ////////////////////////
+
+ /// a JSON value
+ union json_value
+ {
+ /// object (stored with pointer to save storage)
+ object_t* object;
+ /// array (stored with pointer to save storage)
+ array_t* array;
+ /// string (stored with pointer to save storage)
+ string_t* string;
+ /// boolean
+ boolean_t boolean;
+ /// number (integer)
+ number_integer_t number_integer;
+ /// number (floating-point)
+ number_float_t number_float;
+
+ /// default constructor (for null values)
+ json_value() noexcept = default;
+ /// constructor for booleans
+ json_value(boolean_t v) noexcept : boolean(v) {}
+ /// constructor for numbers (integer)
+ json_value(number_integer_t v) noexcept : number_integer(v) {}
+ /// constructor for numbers (floating-point)
+ json_value(number_float_t v) noexcept : number_float(v) {}
+ /// constructor for empty values of a given type
+ json_value(value_t t)
+ {
+ switch (t)
+ {
+ case (value_t::null):
+ case (value_t::discarded):
+ {
+ break;
+ }
+
+ case (value_t::object):
+ {
+ object = create<object_t>();
+ break;
+ }
+
+ case (value_t::array):
+ {
+ array = create<array_t>();
+ break;
+ }
+
+ case (value_t::string):
+ {
+ string = create<string_t>("");
+ break;
+ }
+
+ case (value_t::boolean):
+ {
+ boolean = boolean_t(false);
+ break;
+ }
+
+ case (value_t::number_integer):
+ {
+ number_integer = number_integer_t(0);
+ break;
+ }
+
+ case (value_t::number_float):
+ {
+ number_float = number_float_t(0.0);
+ break;
+ }
+ }
+ }
+
+ /// constructor for strings
+ json_value(const string_t& value)
+ {
+ string = create<string_t>(value);
+ }
+
+ /// constructor for objects
+ json_value(const object_t& value)
+ {
+ object = create<object_t>(value);
+ }
+
+ /// constructor for arrays
+ json_value(const array_t& value)
+ {
+ array = create<array_t>(value);
+ }
+ };
+
+
+ public:
+ //////////////////////////
+ // JSON parser callback //
+ //////////////////////////
+
+ /*!
+ @brief JSON callback events
+
+ This enumeration lists the parser events that can trigger calling a
+ callback function of type @ref parser_callback_t during parsing.
+ */
+ enum class parse_event_t : uint8_t
+ {
+ /// the parser read `{` and started to process a JSON object
+ object_start,
+ /// the parser read `}` and finished processing a JSON object
+ object_end,
+ /// the parser read `[` and started to process a JSON array
+ array_start,
+ /// the parser read `]` and finished processing a JSON array
+ array_end,
+ /// the parser read a key of a value in an object
+ key,
+ /// the parser finished reading a JSON value
+ value
+ };
+
+ /*!
+ @brief per-element parser callback type
+
+ With a parser callback function, the result of parsing a JSON text can be
+ influenced. When passed to @ref parse(std::istream&, parser_callback_t) or
+ @ref parse(const string_t&, parser_callback_t), it is called on certain
+ events (passed as @ref parse_event_t via parameter @a event) with a set
+ recursion depth @a depth and context JSON value @a parsed. The return value
+ of the callback function is a boolean indicating whether the element that
+ emitted the callback shall be kept or not.
+
+ We distinguish six scenarios (determined by the event type) in which the
+ callback function can be called. The following table describes the values
+ of the parameters @a depth, @a event, and @a parsed.
+
+ parameter @a event | description | parameter @a depth | parameter @a parsed
+ ------------------ | ----------- | ------------------ | -------------------
+ parse_event_t::object_start | the parser read `{` and started to process a JSON object | depth of the parent of the JSON object | a JSON value with type discarded
+ parse_event_t::key | the parser read a key of a value in an object | depth of the currently parsed JSON object | a JSON string containing the key
+ parse_event_t::object_end | the parser read `}` and finished processing a JSON object | depth of the parent of the JSON object | the parsed JSON object
+ parse_event_t::array_start | the parser read `[` and started to process a JSON array | depth of the parent of the JSON array | a JSON value with type discarded
+ parse_event_t::array_end | the parser read `]` and finished processing a JSON array | depth of the parent of the JSON array | the parsed JSON array
+ parse_event_t::value | the parser finished reading a JSON value | depth of the value | the parsed JSON value
+
+ Discarding a value (i.e., returning `false`) has different effects depending on the
+ context in which function was called:
+
+ - Discarded values in structured types are skipped. That is, the parser
+ will behave as if the discarded value was never read.
+ - In case a value outside a structured type is skipped, it is replaced with
+ `null`. This case happens if the top-level element is skipped.
+
+ @param[in] depth the depth of the recursion during parsing
+
+ @param[in] event an event of type parse_event_t indicating the context in
+ the callback function has been called
+
+ @param[in,out] parsed the current intermediate parse result; note that
+ writing to this value has no effect for parse_event_t::key events
+
+ @return Whether the JSON value which called the function during parsing
+ should be kept (`true`) or not (`false`). In the latter case, it is either
+ skipped completely or replaced by an empty discarded object.
+
+ @sa @ref parse(std::istream&, parser_callback_t) or
+ @ref parse(const string_t&, parser_callback_t) for examples
+ */
+ using parser_callback_t = std::function<bool(
+ int depth, parse_event_t event, basic_json& parsed)>;
+
+
+ //////////////////
+ // constructors //
+ //////////////////
+
+ /*!
+ @brief create an empty value with a given type
+
+ Create an empty JSON value with a given type. The value will be default
+ initialized with an empty value which depends on the type:
+
+ Value type | initial value
+ ----------- | -------------
+ null | `null`
+ boolean | `false`
+ string | `""`
+ number | `0`
+ object | `{}`
+ array | `[]`
+
+ @param[in] value the type of the value to create
+
+ @complexity Constant.
+
+ @throw std::bad_alloc if allocation for object, array, or string value
+ fails
+
+ @liveexample{The following code shows the constructor for different @ref
+ value_t values,basic_json__value_t}
+ */
+ basic_json(const value_t value)
+ : m_type(value), m_value(value)
+ {}
+
+ /*!
+ @brief create a null object (implicitly)
+
+ Create a `null` JSON value. This is the implicit version of the `null`
+ value constructor as it takes no parameters.
+
+ @complexity Constant.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+ - As postcondition, it holds: `basic_json().empty() == true`.
+
+ @liveexample{The following code shows the constructor for a `null` JSON
+ value.,basic_json}
+
+ @sa basic_json(std::nullptr_t)
+ */
+ basic_json() noexcept = default;
+
+ /*!
+ @brief create a null object (explicitly)
+
+ Create a `null` JSON value. This is the explicitly version of the `null`
+ value constructor as it takes a null pointer as parameter. It allows to
+ create `null` values by explicitly assigning a @c nullptr to a JSON value.
+ The passed null pointer itself is not read - it is only used to choose the
+ right constructor.
+
+ @complexity Constant.
+
+ @liveexample{The following code shows the constructor with null pointer
+ parameter.,basic_json__nullptr_t}
+
+ @sa basic_json()
+ */
+ basic_json(std::nullptr_t) noexcept
+ : basic_json(value_t::null)
+ {}
+
+ /*!
+ @brief create an object (explicit)
+
+ Create an object JSON value with a given content.
+
+ @param[in] value a value for the object
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for object value fails
+
+ @liveexample{The following code shows the constructor with an @ref object_t
+ parameter.,basic_json__object_t}
+
+ @sa basic_json(const CompatibleObjectType&)
+ */
+ basic_json(const object_t& value)
+ : m_type(value_t::object), m_value(value)
+ {}
+
+ /*!
+ @brief create an object (implicit)
+
+ Create an object JSON value with a given content. This constructor allows
+ any type that can be used to construct values of type @ref object_t.
+ Examples include the types `std::map` and `std::unordered_map`.
+
+ @tparam CompatibleObjectType an object type whose `key_type` and
+ `value_type` is compatible to @ref object_t
+
+ @param[in] value a value for the object
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for object value fails
+
+ @liveexample{The following code shows the constructor with several
+ compatible object type parameters.,basic_json__CompatibleObjectType}
+
+ @sa basic_json(const object_t&)
+ */
+ template <class CompatibleObjectType, typename
+ std::enable_if<
+ std::is_constructible<typename object_t::key_type, typename CompatibleObjectType::key_type>::value and
+ std::is_constructible<basic_json, typename CompatibleObjectType::mapped_type>::value, int>::type
+ = 0>
+ basic_json(const CompatibleObjectType& value)
+ : m_type(value_t::object)
+ {
+ using std::begin;
+ using std::end;
+ m_value.object = create<object_t>(begin(value), end(value));
+ }
+
+ /*!
+ @brief create an array (explicit)
+
+ Create an array JSON value with a given content.
+
+ @param[in] value a value for the array
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for array value fails
+
+ @liveexample{The following code shows the constructor with an @ref array_t
+ parameter.,basic_json__array_t}
+
+ @sa basic_json(const CompatibleArrayType&)
+ */
+ basic_json(const array_t& value)
+ : m_type(value_t::array), m_value(value)
+ {}
+
+ /*!
+ @brief create an array (implicit)
+
+ Create an array JSON value with a given content. This constructor allows
+ any type that can be used to construct values of type @ref array_t.
+ Examples include the types `std::vector`, `std::list`, and `std::set`.
+
+ @tparam CompatibleArrayType an object type whose `value_type` is compatible
+ to @ref array_t
+
+ @param[in] value a value for the array
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for array value fails
+
+ @liveexample{The following code shows the constructor with several
+ compatible array type parameters.,basic_json__CompatibleArrayType}
+
+ @sa basic_json(const array_t&)
+ */
+ template <class CompatibleArrayType, typename
+ std::enable_if<
+ not std::is_same<CompatibleArrayType, typename basic_json_t::iterator>::value and
+ not std::is_same<CompatibleArrayType, typename basic_json_t::const_iterator>::value and
+ not std::is_same<CompatibleArrayType, typename basic_json_t::reverse_iterator>::value and
+ not std::is_same<CompatibleArrayType, typename basic_json_t::const_reverse_iterator>::value and
+ not std::is_same<CompatibleArrayType, typename array_t::iterator>::value and
+ not std::is_same<CompatibleArrayType, typename array_t::const_iterator>::value and
+ std::is_constructible<basic_json, typename CompatibleArrayType::value_type>::value, int>::type
+ = 0>
+ basic_json(const CompatibleArrayType& value)
+ : m_type(value_t::array)
+ {
+ using std::begin;
+ using std::end;
+ m_value.array = create<array_t>(begin(value), end(value));
+ }
+
+ /*!
+ @brief create a string (explicit)
+
+ Create an string JSON value with a given content.
+
+ @param[in] value a value for the string
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for string value fails
+
+ @liveexample{The following code shows the constructor with an @ref string_t
+ parameter.,basic_json__string_t}
+
+ @sa basic_json(const typename string_t::value_type*)
+ @sa basic_json(const CompatibleStringType&)
+ */
+ basic_json(const string_t& value)
+ : m_type(value_t::string), m_value(value)
+ {}
+
+ /*!
+ @brief create a string (explicit)
+
+ Create a string JSON value with a given content.
+
+ @param[in] value a literal value for the string
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for string value fails
+
+ @liveexample{The following code shows the constructor with string literal
+ parameter.,basic_json__string_t_value_type}
+
+ @sa basic_json(const string_t&)
+ @sa basic_json(const CompatibleStringType&)
+ */
+ basic_json(const typename string_t::value_type* value)
+ : basic_json(string_t(value))
+ {}
+
+ /*!
+ @brief create a string (implicit)
+
+ Create a string JSON value with a given content.
+
+ @param[in] value a value for the string
+
+ @tparam CompatibleStringType an string type which is compatible to @ref
+ string_t
+
+ @complexity Linear in the size of the passed @a value.
+
+ @throw std::bad_alloc if allocation for string value fails
+
+ @liveexample{The following code shows the construction of a string value
+ from a compatible type.,basic_json__CompatibleStringType}
+
+ @sa basic_json(const string_t&)
+ */
+ template <class CompatibleStringType, typename
+ std::enable_if<
+ std::is_constructible<string_t, CompatibleStringType>::value, int>::type
+ = 0>
+ basic_json(const CompatibleStringType& value)
+ : basic_json(string_t(value))
+ {}
+
+ /*!
+ @brief create a boolean (explicit)
+
+ Creates a JSON boolean type from a given value.
+
+ @param[in] value a boolean value to store
+
+ @complexity Constant.
+
+ @liveexample{The example below demonstrates boolean
+ values.,basic_json__boolean_t}
+ */
+ basic_json(boolean_t value)
+ : m_type(value_t::boolean), m_value(value)
+ {}
+
+ /*!
+ @brief create an integer number (explicit)
+
+ Create an interger number JSON value with a given content.
+
+ @tparam T helper type to compare number_integer_t and int (not visible in)
+ the interface.
+
+ @param[in] value an integer to create a JSON number from
+
+ @note This constructor would have the same signature as @ref
+ basic_json(const int value), so we need to switch this one off in case
+ number_integer_t is the same as int. This is done via the helper type @a T.
+
+ @complexity Constant.
+
+ @liveexample{The example below shows the construction of a JSON integer
+ number value.,basic_json__number_integer_t}
+
+ @sa basic_json(const int)
+ */
+ template<typename T,
+ typename std::enable_if<
+ not (std::is_same<T, int>::value)
+ and std::is_same<T, number_integer_t>::value
+ , int>::type = 0>
+ basic_json(const number_integer_t value)
+ : m_type(value_t::number_integer), m_value(value)
+ {}
+
+ /*!
+ @brief create an integer number from an enum type (explicit)
+
+ Create an integer number JSON value with a given content.
+
+ @param[in] value an integer to create a JSON number from
+
+ @note This constructor allows to pass enums directly to a constructor. As
+ C++ has no way of specifying the type of an anonymous enum explicitly, we
+ can only rely on the fact that such values implicitly convert to int. As
+ int may already be the same type of number_integer_t, we may need to switch
+ off the constructor @ref basic_json(const number_integer_t).
+
+ @complexity Constant.
+
+ @liveexample{The example below shows the construction of a JSON integer
+ number value from an anonymous enum.,basic_json__const_int}
+
+ @sa basic_json(const number_integer_t)
+ */
+ basic_json(const int value)
+ : m_type(value_t::number_integer),
+ m_value(static_cast<number_integer_t>(value))
+ {}
+
+ /*!
+ @brief create an integer number (implicit)
+
+ Create an integer number JSON value with a given content. This constructor
+ allows any type that can be used to construct values of type @ref
+ number_integer_t. Examples may include the types `int`, `int32_t`, or
+ `short`.
+
+ @tparam CompatibleNumberIntegerType an integer type which is compatible to
+ @ref number_integer_t.
+
+ @param[in] value an integer to create a JSON number from
+
+ @complexity Constant.
+
+ @liveexample{The example below shows the construction of several JSON
+ integer number values from compatible
+ types.,basic_json__CompatibleIntegerNumberType}
+
+ @sa basic_json(const number_integer_t)
+ */
+ template<typename CompatibleNumberIntegerType, typename
+ std::enable_if<
+ std::is_constructible<number_integer_t, CompatibleNumberIntegerType>::value and
+ std::numeric_limits<CompatibleNumberIntegerType>::is_integer, CompatibleNumberIntegerType>::type
+ = 0>
+ basic_json(const CompatibleNumberIntegerType value) noexcept
+ : m_type(value_t::number_integer),
+ m_value(static_cast<number_integer_t>(value))
+ {}
+
+ /*!
+ @brief create a floating-point number (explicit)
+
+ Create a floating-point number JSON value with a given content.
+
+ @param[in] value a floating-point value to create a JSON number from
+
+ @note RFC 7159 <http://www.rfc-editor.org/rfc/rfc7159.txt>, section 6
+ disallows NaN values:
+ > Numeric values that cannot be represented in the grammar below (such
+ > as Infinity and NaN) are not permitted.
+ In case the parameter @a value is not a number, a JSON null value is
+ created instead.
+
+ @complexity Constant.
+
+ @liveexample{The following example creates several floating-point
+ values.,basic_json__number_float_t}
+ */
+ basic_json(const number_float_t value)
+ : m_type(value_t::number_float), m_value(value)
+ {
+ // replace infinity and NAN by null
+ if (not std::isfinite(value))
+ {
+ m_type = value_t::null;
+ m_value = json_value();
+ }
+ }
+
+ /*!
+ @brief create an floating-point number (implicit)
+
+ Create an floating-point number JSON value with a given content. This
+ constructor allows any type that can be used to construct values of type
+ @ref number_float_t. Examples may include the types `float`.
+
+ @tparam CompatibleNumberFloatType a floating-point type which is compatible
+ to @ref number_float_t.
+
+ @param[in] value a floating-point to create a JSON number from
+
+ @note RFC 7159 <http://www.rfc-editor.org/rfc/rfc7159.txt>, section 6
+ disallows NaN values:
+ > Numeric values that cannot be represented in the grammar below (such
+ > as Infinity and NaN) are not permitted.
+ In case the parameter @a value is not a number, a JSON null value is
+ created instead.
+
+ @complexity Constant.
+
+ @liveexample{The example below shows the construction of several JSON
+ floating-point number values from compatible
+ types.,basic_json__CompatibleNumberFloatType}
+
+ @sa basic_json(const number_float_t)
+ */
+ template<typename CompatibleNumberFloatType, typename = typename
+ std::enable_if<
+ std::is_constructible<number_float_t, CompatibleNumberFloatType>::value and
+ std::is_floating_point<CompatibleNumberFloatType>::value>::type
+ >
+ basic_json(const CompatibleNumberFloatType value) noexcept
+ : basic_json(number_float_t(value))
+ {}
+
+ /*!
+ @brief create a container (array or object) from an initializer list
+
+ Creates a JSON value of type array or object from the passed initializer
+ list @a init. In case @a type_deduction is `true` (default), the type of
+ the JSON value to be created is deducted from the initializer list @a init
+ according to the following rules:
+
+ 1. If the list is empty, an empty JSON object value `{}` is created.
+ 2. If the list consists of pairs whose first element is a string, a JSON
+ object value is created where the first elements of the pairs are treated
+ as keys and the second elements are as values.
+ 3. In all other cases, an array is created.
+
+ The rules aim to create the best fit between a C++ initializer list and
+ JSON values. The ratioinale is as follows:
+
+ 1. The empty initializer list is written as `{}` which is exactly an empty
+ JSON object.
+ 2. C++ has now way of describing mapped types other than to list a list of
+ pairs. As JSON requires that keys must be of type string, rule 2 is the
+ weakest constraint one can pose on initializer lists to interpret them as
+ an object.
+ 3. In all other cases, the initializer list could not be interpreted as
+ JSON object type, so interpreting it as JSON array type is safe.
+
+ With the rules described above, the following JSON values cannot be
+ expressed by an initializer list:
+
+ - the empty array (`[]`): use @ref array(std::initializer_list<basic_json>)
+ with an empty initializer list in this case
+ - arrays whose elements satisfy rule 2: use @ref
+ array(std::initializer_list<basic_json>) with the same initializer list
+ in this case
+
+ @note When used without parentheses around an empty initializer list, @ref
+ basic_json() is called instead of this function, yielding the JSON null
+ value.
+
+ @param[in] init initializer list with JSON values
+
+ @param[in] type_deduction internal parameter; when set to `true`, the type
+ of the JSON value is deducted from the initializer list @a init; when set
+ to `false`, the type provided via @a manual_type is forced. This mode is
+ used by the functions @ref array(std::initializer_list<basic_json>) and
+ @ref object(std::initializer_list<basic_json>).
+
+ @param[in] manual_type internal parameter; when @a type_deduction is set to
+ `false`, the created JSON value will use the provided type (only @ref
+ value_t::array and @ref value_t::object are valid); when @a type_deduction
+ is set to `true`, this parameter has no effect
+
+ @throw std::domain_error if @a type_deduction is `false`, @a manual_type is
+ `value_t::object`, but @a init contains an element which is not a pair
+ whose first element is a string
+
+ @complexity Linear in the size of the initializer list @a init.
+
+ @liveexample{The example below shows how JSON values are created from
+ initializer lists,basic_json__list_init_t}
+
+ @sa basic_json array(std::initializer_list<basic_json>) - create a JSON
+ array value from an initializer list
+ @sa basic_json object(std::initializer_list<basic_json>) - create a JSON
+ object value from an initializer list
+ */
+ basic_json(std::initializer_list<basic_json> init,
+ bool type_deduction = true,
+ value_t manual_type = value_t::array)
+ {
+ // the initializer list could describe an object
+ bool is_object = true;
+
+ // check if each element is an array with two elements whose first element
+ // is a string
+ for (const auto& element : init)
+ {
+ if (element.m_type != value_t::array or element.size() != 2
+ or element[0].m_type != value_t::string)
+ {
+ // we found an element that makes it impossible to use the
+ // initializer list as object
+ is_object = false;
+ break;
+ }
+ }
+
+ // adjust type if type deduction is not wanted
+ if (not type_deduction)
+ {
+ // if array is wanted, do not create an object though possible
+ if (manual_type == value_t::array)
+ {
+ is_object = false;
+ }
+
+ // if object is wanted but impossible, throw an exception
+ if (manual_type == value_t::object and not is_object)
+ {
+ throw std::domain_error("cannot create object from initializer list");
+ }
+ }
+
+ if (is_object)
+ {
+ // the initializer list is a list of pairs -> create object
+ m_type = value_t::object;
+ m_value = value_t::object;
+
+ for (auto& element : init)
+ {
+ m_value.object->emplace(std::move(*(element[0].m_value.string)), std::move(element[1]));
+ }
+ }
+ else
+ {
+ // the initializer list describes an array -> create array
+ m_type = value_t::array;
+ m_value.array = create<array_t>(std::move(init));
+ }
+ }
+
+ /*!
+ @brief explicitly create an array from an initializer list
+
+ Creates a JSON array value from a given initializer list. That is, given a
+ list of values `a, b, c`, creates the JSON value `[a, b, c]`. If the
+ initializer list is empty, the empty array `[]` is created.
+
+ @note This function is only needed to express two edge cases that cannot be
+ realized with the initializer list constructor (@ref
+ basic_json(std::initializer_list<basic_json>, bool, value_t)). These cases
+ are:
+ 1. creating an array whose elements are all pairs whose first element is a
+ string - in this case, the initializer list constructor would create an
+ object, taking the first elements as keys
+ 2. creating an empty array - passing the empty initializer list to the
+ initializer list constructor yields an empty object
+
+ @param[in] init initializer list with JSON values to create an array from
+ (optional)
+
+ @return JSON array value
+
+ @complexity Linear in the size of @a init.
+
+ @liveexample{The following code shows an example for the @ref array
+ function.,array}
+
+ @sa basic_json(std::initializer_list<basic_json>, bool, value_t) - create a
+ JSON value from an initializer list
+ @sa basic_json object(std::initializer_list<basic_json>) - create a JSON
+ object value from an initializer list
+ */
+ static basic_json array(std::initializer_list<basic_json> init =
+ std::initializer_list<basic_json>())
+ {
+ return basic_json(init, false, value_t::array);
+ }
+
+ /*!
+ @brief explicitly create an object from an initializer list
+
+ Creates a JSON object value from a given initializer list. The initializer
+ lists elements must be pairs, and their first elments must be strings. If
+ the initializer list is empty, the empty object `{}` is created.
+
+ @note This function is only added for symmetry reasons. In contrast to the
+ related function @ref basic_json array(std::initializer_list<basic_json>),
+ there are no cases which can only be expressed by this function. That is,
+ any initializer list @a init can also be passed to the initializer list
+ constructor @ref basic_json(std::initializer_list<basic_json>, bool,
+ value_t).
+
+ @param[in] init initializer list to create an object from (optional)
+
+ @return JSON object value
+
+ @throw std::domain_error if @a init is not a pair whose first elements are
+ strings; thrown by @ref basic_json(std::initializer_list<basic_json>, bool,
+ value_t)
+
+ @complexity Linear in the size of @a init.
+
+ @liveexample{The following code shows an example for the @ref object
+ function.,object}
+
+ @sa basic_json(std::initializer_list<basic_json>, bool, value_t) - create a
+ JSON value from an initializer list
+ @sa basic_json array(std::initializer_list<basic_json>) - create a JSON
+ array value from an initializer list
+ */
+ static basic_json object(std::initializer_list<basic_json> init =
+ std::initializer_list<basic_json>())
+ {
+ return basic_json(init, false, value_t::object);
+ }
+
+ /*!
+ @brief construct an array with count copies of given value
+
+ Constructs a JSON array value by creating @a count copies of a passed
+ value. In case @a count is `0`, an empty array is created. As postcondition,
+ `std::distance(begin(),end()) == count` holds.
+
+ @param[in] count the number of JSON copies of @a value to create
+ @param[in] value the JSON value to copy
+
+ @complexity Linear in @a count.
+
+ @liveexample{The following code shows examples for the @ref
+ basic_json(size_type\, const basic_json&)
+ constructor.,basic_json__size_type_basic_json}
+ */
+ basic_json(size_type count, const basic_json& value)
+ : m_type(value_t::array)
+ {
+ m_value.array = create<array_t>(count, value);
+ }
+
+ /*!
+ @brief construct a JSON container given an iterator range
+
+ Constructs the JSON value with the contents of the range `[first, last)`.
+ The semantics depends on the different types a JSON value can have:
+ - In case of primitive types (number, boolean, or string), @a first must
+ be `begin()` and @a last must be `end()`. In this case, the value is
+ copied. Otherwise, std::out_of_range is thrown.
+ - In case of structured types (array, object), the constructor behaves
+ as similar versions for `std::vector`.
+ - In case of a null type, std::domain_error is thrown.
+
+ @tparam InputIT an input iterator type (@ref iterator or @ref
+ const_iterator)
+
+ @param[in] first begin of the range to copy from (included)
+ @param[in] last end of the range to copy from (excluded)
+
+ @throw std::domain_error if iterators are not compatible; that is, do not
+ belong to the same JSON value
+ @throw std::out_of_range if iterators are for a primitive type (number,
+ boolean, or string) where an out of range error can be detected easily
+ @throw std::bad_alloc if allocation for object, array, or string fails
+ @throw std::domain_error if called with a null value
+
+ @complexity Linear in distance between @a first and @a last.
+
+ @liveexample{The example below shows several ways to create JSON values by
+ specifying a subrange with iterators.,basic_json__InputIt_InputIt}
+ */
+ template <class InputIT, typename
+ std::enable_if<
+ std::is_same<InputIT, typename basic_json_t::iterator>::value or
+ std::is_same<InputIT, typename basic_json_t::const_iterator>::value
+ , int>::type
+ = 0>
+ basic_json(InputIT first, InputIT last) : m_type(first.m_object->m_type)
+ {
+ // make sure iterator fits the current value
+ if (first.m_object != last.m_object)
+ {
+ throw std::domain_error("iterators are not compatible");
+ }
+
+ // check if iterator range is complete for primitive values
+ switch (m_type)
+ {
+ case value_t::number_integer:
+ case value_t::number_float:
+ case value_t::boolean:
+ case value_t::string:
+ {
+ if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end())
+ {
+ throw std::out_of_range("iterators out of range");
+ }
+ break;
+ }
+
+ default:
+ {
+ break;
+ }
+ }
+
+ switch (m_type)
+ {
+ case value_t::number_integer:
+ {
+ m_value.number_integer = first.m_object->m_value.number_integer;
+ break;
+ }
+
+ case value_t::number_float:
+ {
+ m_value.number_float = first.m_object->m_value.number_float;
+ break;
+ }
+
+ case value_t::boolean:
+ {
+ m_value.boolean = first.m_object->m_value.boolean;
+ break;
+ }
+
+ case value_t::string:
+ {
+ m_value = *first.m_object->m_value.string;
+ break;
+ }
+
+ case value_t::object:
+ {
+ m_value.object = create<object_t>(first.m_it.object_iterator, last.m_it.object_iterator);
+ break;
+ }
+
+ case value_t::array:
+ {
+ m_value.array = create<array_t>(first.m_it.array_iterator, last.m_it.array_iterator);
+ break;
+ }
+
+ default:
+ {
+ throw std::domain_error("cannot use construct with iterators from " + first.m_object->type_name());
+ }
+ }
+ }
+
+ ///////////////////////////////////////
+ // other constructors and destructor //
+ ///////////////////////////////////////
+
+ /*!
+ @brief copy constructor
+
+ Creates a copy of a given JSON value.
+
+ @param[in] other the JSON value to copy
+
+ @complexity Linear in the size of @a other.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is linear.
+ - As postcondition, it holds: `other == basic_json(other)`.
+
+ @throw std::bad_alloc if allocation for object, array, or string fails.
+
+ @liveexample{The following code shows an example for the copy
+ constructor.,basic_json__basic_json}
+ */
+ basic_json(const basic_json& other)
+ : m_type(other.m_type)
+ {
+ switch (m_type)
+ {
+ case (value_t::null):
+ case (value_t::discarded):
+ {
+ break;
+ }
+
+ case (value_t::object):
+ {
+ m_value = *other.m_value.object;
+ break;
+ }
+
+ case (value_t::array):
+ {
+ m_value = *other.m_value.array;
+ break;
+ }
+
+ case (value_t::string):
+ {
+ m_value = *other.m_value.string;
+ break;
+ }
+
+ case (value_t::boolean):
+ {
+ m_value = other.m_value.boolean;
+ break;
+ }
+
+ case (value_t::number_integer):
+ {
+ m_value = other.m_value.number_integer;
+ break;
+ }
+
+ case (value_t::number_float):
+ {
+ m_value = other.m_value.number_float;
+ break;
+ }
+ }
+ }
+
+ /*!
+ @brief move constructor
+
+ Move constructor. Constructs a JSON value with the contents of the given
+ value @a other using move semantics. It "steals" the resources from @a
+ other and leaves it as JSON null value.
+
+ @param[in,out] other value to move to this object
+
+ @post @a other is a JSON null value
+
+ @complexity Constant.
+
+ @liveexample{The code below shows the move constructor explicitly called
+ via std::move.,basic_json__moveconstructor}
+ */
+ basic_json(basic_json&& other) noexcept
+ : m_type(std::move(other.m_type)),
+ m_value(std::move(other.m_value))
+ {
+ // invalidate payload
+ other.m_type = value_t::null;
+ other.m_value = {};
+ }
+
+ /*!
+ @brief copy assignment
+
+ Copy assignment operator. Copies a JSON value via the "copy and swap"
+ strategy: It is expressed in terms of the copy constructor, destructor, and
+ the swap() member function.
+
+ @param[in] other value to copy from
+
+ @complexity Linear.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is linear.
+
+ @liveexample{The code below shows and example for the copy assignment. It
+ creates a copy of value `a` which is then swapped with `b`. Finally\, the
+ copy of `a` (which is the null value after the swap) is
+ destroyed.,basic_json__copyassignment}
+ */
+ reference& operator=(basic_json other) noexcept (
+ std::is_nothrow_move_constructible<value_t>::value and
+ std::is_nothrow_move_assignable<value_t>::value and
+ std::is_nothrow_move_constructible<json_value>::value and
+ std::is_nothrow_move_assignable<json_value>::value
+ )
+ {
+ using std::swap;
+ swap(m_type, other.m_type);
+ swap(m_value, other.m_value);
+ return *this;
+ }
+
+ /*!
+ @brief destructor
+
+ Destroys the JSON value and frees all allocated memory.
+
+ @complexity Linear.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is linear.
+ - All stored elements are destroyed and all memory is freed.
+ */
+ ~basic_json()
+ {
+ switch (m_type)
+ {
+ case (value_t::object):
+ {
+ AllocatorType<object_t> alloc;
+ alloc.destroy(m_value.object);
+ alloc.deallocate(m_value.object, 1);
+ break;
+ }
+
+ case (value_t::array):
+ {
+ AllocatorType<array_t> alloc;
+ alloc.destroy(m_value.array);
+ alloc.deallocate(m_value.array, 1);
+ break;
+ }
+
+ case (value_t::string):
+ {
+ AllocatorType<string_t> alloc;
+ alloc.destroy(m_value.string);
+ alloc.deallocate(m_value.string, 1);
+ break;
+ }
+
+ default:
+ {
+ // all other types need no specific destructor
+ break;
+ }
+ }
+ }
+
+
+ public:
+ ///////////////////////
+ // object inspection //
+ ///////////////////////
+
+ /// @name object inspection
+ /// @{
+
+ /*!
+ @brief serialization
+
+ Serialization function for JSON values. The function tries to mimick
+ Python's @p json.dumps() function, and currently supports its @p indent
+ parameter.
+
+ @param[in] indent if indent is nonnegative, then array elements and object
+ members will be pretty-printed with that indent level. An indent level of 0
+ will only insert newlines. -1 (the default) selects the most compact
+ representation
+
+ @return string containing the serialization of the JSON value
+
+ @complexity Linear.
+
+ @liveexample{The following example shows the effect of different @a indent
+ parameters to the result of the serializaion.,dump}
+
+ @see https://docs.python.org/2/library/json.html#json.dump
+ */
+ string_t dump(const int indent = -1) const
+ {
+ std::stringstream ss;
+
+ if (indent >= 0)
+ {
+ dump(ss, true, static_cast<unsigned int>(indent));
+ }
+ else
+ {
+ dump(ss, false, 0);
+ }
+
+ return ss.str();
+ }
+
+ /*!
+ @brief return the type of the JSON value (explicit)
+
+ Return the type of the JSON value as a value from the @ref value_t
+ enumeration.
+
+ @return the type of the JSON value
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref type() for all JSON
+ types.,type}
+ */
+ value_t type() const noexcept
+ {
+ return m_type;
+ }
+
+ /*!
+ @brief return whether type is primitive
+
+ This function returns true iff the JSON type is primitive (string, number,
+ boolean, or null).
+
+ @return `true` if type is primitive (string, number, boolean, or null),
+ `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_primitive for all JSON
+ types.,is_primitive}
+ */
+ bool is_primitive() const noexcept
+ {
+ return is_null() or is_string() or is_boolean() or is_number();
+ }
+
+ /*!
+ @brief return whether type is structured
+
+ This function returns true iff the JSON type is structured (array or
+ object).
+
+ @return `true` if type is structured (array or object), `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_structured for all JSON
+ types.,is_structured}
+ */
+ bool is_structured() const noexcept
+ {
+ return is_array() or is_object();
+ }
+
+ /*!
+ @brief return whether value is null
+
+ This function returns true iff the JSON value is null.
+
+ @return `true` if type is null, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_null for all JSON
+ types.,is_null}
+ */
+ bool is_null() const noexcept
+ {
+ return m_type == value_t::null;
+ }
+
+ /*!
+ @brief return whether value is a boolean
+
+ This function returns true iff the JSON value is a boolean.
+
+ @return `true` if type is boolean, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_boolean for all JSON
+ types.,is_boolean}
+ */
+ bool is_boolean() const noexcept
+ {
+ return m_type == value_t::boolean;
+ }
+
+ /*!
+ @brief return whether value is a number
+
+ This function returns true iff the JSON value is a number. This includes
+ both integer and floating-point values.
+
+ @return `true` if type is number, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_number for all JSON
+ types.,is_number}
+ */
+ bool is_number() const noexcept
+ {
+ return is_number_integer() or is_number_float();
+ }
+
+ /*!
+ @brief return whether value is an integer number
+
+ This function returns true iff the JSON value is an integer number. This
+ excludes floating-point values.
+
+ @return `true` if type is an integer number, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_number_integer for all
+ JSON types.,is_number_integer}
+ */
+ bool is_number_integer() const noexcept
+ {
+ return m_type == value_t::number_integer;
+ }
+
+ /*!
+ @brief return whether value is a floating-point number
+
+ This function returns true iff the JSON value is a floating-point number.
+ This excludes integer values.
+
+ @return `true` if type is a floating-point number, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_number_float for all
+ JSON types.,is_number_float}
+ */
+ bool is_number_float() const noexcept
+ {
+ return m_type == value_t::number_float;
+ }
+
+ /*!
+ @brief return whether value is an object
+
+ This function returns true iff the JSON value is an object.
+
+ @return `true` if type is object, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_object for all JSON
+ types.,is_object}
+ */
+ bool is_object() const noexcept
+ {
+ return m_type == value_t::object;
+ }
+
+ /*!
+ @brief return whether value is an array
+
+ This function returns true iff the JSON value is an array.
+
+ @return `true` if type is array, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_array for all JSON
+ types.,is_array}
+ */
+ bool is_array() const noexcept
+ {
+ return m_type == value_t::array;
+ }
+
+ /*!
+ @brief return whether value is a string
+
+ This function returns true iff the JSON value is a string.
+
+ @return `true` if type is string, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_string for all JSON
+ types.,is_string}
+ */
+ bool is_string() const noexcept
+ {
+ return m_type == value_t::string;
+ }
+
+ /*!
+ @brief return whether value is discarded
+
+ This function returns true iff the JSON value was discarded during parsing
+ with a callback function (see @ref parser_callback_t).
+
+ @note This function will always be `false` for JSON values after parsing.
+ That is, discarded values can only occur during parsing, but will be
+ removed when inside a structured value or replaced by null in other cases.
+
+ @return `true` if type is discarded, `false` otherwise.
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies @ref is_discarded for all JSON
+ types.,is_discarded}
+ */
+ bool is_discarded() const noexcept
+ {
+ return m_type == value_t::discarded;
+ }
+
+ /*!
+ @brief return the type of the JSON value (implicit)
+
+ Implicitly return the type of the JSON value as a value from the @ref
+ value_t enumeration.
+
+ @return the type of the JSON value
+
+ @complexity Constant.
+
+ @liveexample{The following code exemplifies the value_t operator for all
+ JSON types.,operator__value_t}
+ */
+ operator value_t() const noexcept
+ {
+ return m_type;
+ }
+
+ /// @}
+
+ private:
+ //////////////////
+ // value access //
+ //////////////////
+
+ /// get an object (explicit)
+ template <class T, typename
+ std::enable_if<
+ std::is_convertible<typename object_t::key_type, typename T::key_type>::value and
+ std::is_convertible<basic_json_t, typename T::mapped_type>::value
+ , int>::type = 0>
+ T get_impl(T*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::object):
+ {
+ return T(m_value.object->begin(), m_value.object->end());
+ }
+ default:
+ {
+ throw std::domain_error("type must be object, but is " + type_name());
+ }
+ }
+ }
+
+ /// get an object (explicit)
+ object_t get_impl(object_t*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::object):
+ {
+ return *(m_value.object);
+ }
+ default:
+ {
+ throw std::domain_error("type must be object, but is " + type_name());
+ }
+ }
+ }
+
+ /// get an array (explicit)
+ template <class T, typename
+ std::enable_if<
+ std::is_convertible<basic_json_t, typename T::value_type>::value and
+ not std::is_same<basic_json_t, typename T::value_type>::value and
+ not std::is_arithmetic<T>::value and
+ not std::is_convertible<std::string, T>::value and
+ not has_mapped_type<T>::value
+ , int>::type = 0>
+ T get_impl(T*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::array):
+ {
+ T to_vector;
+ std::transform(m_value.array->begin(), m_value.array->end(),
+ std::inserter(to_vector, to_vector.end()), [](basic_json i)
+ {
+ return i.get<typename T::value_type>();
+ });
+ return to_vector;
+ }
+ default:
+ {
+ throw std::domain_error("type must be array, but is " + type_name());
+ }
+ }
+ }
+
+ /// get an array (explicit)
+ template <class T, typename
+ std::enable_if<
+ std::is_convertible<basic_json_t, T>::value and
+ not std::is_same<basic_json_t, T>::value
+ , int>::type = 0>
+ std::vector<T> get_impl(std::vector<T>*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::array):
+ {
+ std::vector<T> to_vector;
+ to_vector.reserve(m_value.array->size());
+ std::transform(m_value.array->begin(), m_value.array->end(),
+ std::inserter(to_vector, to_vector.end()), [](basic_json i)
+ {
+ return i.get<T>();
+ });
+ return to_vector;
+ }
+ default:
+ {
+ throw std::domain_error("type must be array, but is " + type_name());
+ }
+ }
+ }
+
+ /// get an array (explicit)
+ template <class T, typename
+ std::enable_if<
+ std::is_same<basic_json, typename T::value_type>::value and
+ not has_mapped_type<T>::value
+ , int>::type = 0>
+ T get_impl(T*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::array):
+ {
+ return T(m_value.array->begin(), m_value.array->end());
+ }
+ default:
+ {
+ throw std::domain_error("type must be array, but is " + type_name());
+ }
+ }
+ }
+
+ /// get an array (explicit)
+ array_t get_impl(array_t*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::array):
+ {
+ return *(m_value.array);
+ }
+ default:
+ {
+ throw std::domain_error("type must be array, but is " + type_name());
+ }
+ }
+ }
+
+ /// get a string (explicit)
+ template <typename T, typename
+ std::enable_if<
+ std::is_convertible<string_t, T>::value
+ , int>::type = 0>
+ T get_impl(T*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::string):
+ {
+ return *m_value.string;
+ }
+ default:
+ {
+ throw std::domain_error("type must be string, but is " + type_name());
+ }
+ }
+ }
+
+ /// get a number (explicit)
+ template<typename T, typename
+ std::enable_if<
+ std::is_arithmetic<T>::value
+ , int>::type = 0>
+ T get_impl(T*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::number_integer):
+ {
+ return static_cast<T>(m_value.number_integer);
+ }
+ case (value_t::number_float):
+ {
+ return static_cast<T>(m_value.number_float);
+ }
+ default:
+ {
+ throw std::domain_error("type must be number, but is " + type_name());
+ }
+ }
+ }
+
+ /// get a boolean (explicit)
+ boolean_t get_impl(boolean_t*) const
+ {
+ switch (m_type)
+ {
+ case (value_t::boolean):
+ {
+ return m_value.boolean;
+ }
+ default:
+ {
+ throw std::domain_error("type must be boolean, but is " + type_name());
+ }
+ }
+ }
+
+ /// get a pointer to the value (object)
+ object_t* get_impl_ptr(object_t*) noexcept
+ {
+ return is_object() ? m_value.object : nullptr;
+ }
+
+ /// get a pointer to the value (object)
+ const object_t* get_impl_ptr(const object_t*) const noexcept
+ {
+ return is_object() ? m_value.object : nullptr;
+ }
+
+ /// get a pointer to the value (array)
+ array_t* get_impl_ptr(array_t*) noexcept
+ {
+ return is_array() ? m_value.array : nullptr;
+ }
+
+ /// get a pointer to the value (array)
+ const array_t* get_impl_ptr(const array_t*) const noexcept
+ {
+ return is_array() ? m_value.array : nullptr;
+ }
+
+ /// get a pointer to the value (string)
+ string_t* get_impl_ptr(string_t*) noexcept
+ {
+ return is_string() ? m_value.string : nullptr;
+ }
+
+ /// get a pointer to the value (string)
+ const string_t* get_impl_ptr(const string_t*) const noexcept
+ {
+ return is_string() ? m_value.string : nullptr;
+ }
+
+ /// get a pointer to the value (boolean)
+ boolean_t* get_impl_ptr(boolean_t*) noexcept
+ {
+ return is_boolean() ? &m_value.boolean : nullptr;
+ }
+
+ /// get a pointer to the value (boolean)
+ const boolean_t* get_impl_ptr(const boolean_t*) const noexcept
+ {
+ return is_boolean() ? &m_value.boolean : nullptr;
+ }
+
+ /// get a pointer to the value (integer number)
+ number_integer_t* get_impl_ptr(number_integer_t*) noexcept
+ {
+ return is_number_integer() ? &m_value.number_integer : nullptr;
+ }
+
+ /// get a pointer to the value (integer number)
+ const number_integer_t* get_impl_ptr(const number_integer_t*) const noexcept
+ {
+ return is_number_integer() ? &m_value.number_integer : nullptr;
+ }
+
+ /// get a pointer to the value (floating-point number)
+ number_float_t* get_impl_ptr(number_float_t*) noexcept
+ {
+ return is_number_float() ? &m_value.number_float : nullptr;
+ }
+
+ /// get a pointer to the value (floating-point number)
+ const number_float_t* get_impl_ptr(const number_float_t*) const noexcept
+ {
+ return is_number_float() ? &m_value.number_float : nullptr;
+ }
+
+ public:
+
+ /// @name value access
+ /// @{
+
+ /*!
+ @brief get a value (explicit)
+
+ Explicit type conversion between the JSON value and a compatible value.
+
+ @tparam ValueType non-pointer type compatible to the JSON value, for
+ instance `int` for JSON integer numbers, `bool` for JSON booleans, or
+ `std::vector` types for JSON arrays
+
+ @return copy of the JSON value, converted to type @a ValueType
+
+ @throw std::domain_error in case passed type @a ValueType is incompatible
+ to JSON
+
+ @complexity Linear in the size of the JSON value.
+
+ @liveexample{The example below shows serveral conversions from JSON values
+ to other types. There a few things to note: (1) Floating-point numbers can
+ be converted to integers\, (2) A JSON array can be converted to a standard
+ `std::vector<short>`\, (3) A JSON object can be converted to C++
+ assiciative containers such as `std::unordered_map<std::string\,
+ json>`.,get__ValueType_const}
+
+ @internal
+ The idea of using a casted null pointer to choose the correct
+ implementation is from <http://stackoverflow.com/a/8315197/266378>.
+ @endinternal
+
+ @sa @ref operator ValueType() const for implicit conversion
+ @sa @ref get() for pointer-member access
+ */
+ template<typename ValueType, typename
+ std::enable_if<
+ not std::is_pointer<ValueType>::value
+ , int>::type = 0>
+ ValueType get() const
+ {
+ return get_impl(static_cast<ValueType*>(nullptr));
+ }
+
+ /*!
+ @brief get a pointer value (explicit)
+
+ Explicit pointer access to the internally stored JSON value. No copies are
+ made.
+
+ @warning Writing data to the pointee of the result yields an undefined
+ state.
+
+ @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref
+ object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, or @ref
+ number_float_t.
+
+ @return pointer to the internally stored JSON value if the requested pointer
+ type @a PointerType fits to the JSON value; `nullptr` otherwise
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how pointers to internal values of a
+ JSON value can be requested. Note that no type conversions are made and a
+ `nullptr` is returned if the value and the requested pointer type does not
+ match.,get__PointerType}
+
+ @sa @ref get_ptr() for explicit pointer-member access
+ */
+ template<typename PointerType, typename
+ std::enable_if<
+ std::is_pointer<PointerType>::value
+ , int>::type = 0>
+ PointerType get() noexcept
+ {
+ // delegate the call to get_ptr
+ return get_ptr<PointerType>();
+ }
+
+ /*!
+ @brief get a pointer value (explicit)
+ @copydoc get()
+ */
+ template<typename PointerType, typename
+ std::enable_if<
+ std::is_pointer<PointerType>::value
+ , int>::type = 0>
+ const PointerType get() const noexcept
+ {
+ // delegate the call to get_ptr
+ return get_ptr<PointerType>();
+ }
+
+ /*!
+ @brief get a pointer value (implicit)
+
+ Implict pointer access to the internally stored JSON value. No copies are
+ made.
+
+ @warning Writing data to the pointee of the result yields an undefined
+ state.
+
+ @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref
+ object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, or @ref
+ number_float_t.
+
+ @return pointer to the internally stored JSON value if the requested pointer
+ type @a PointerType fits to the JSON value; `nullptr` otherwise
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how pointers to internal values of a
+ JSON value can be requested. Note that no type conversions are made and a
+ `nullptr` is returned if the value and the requested pointer type does not
+ match.,get_ptr}
+ */
+ template<typename PointerType, typename
+ std::enable_if<
+ std::is_pointer<PointerType>::value
+ , int>::type = 0>
+ PointerType get_ptr() noexcept
+ {
+ // delegate the call to get_impl_ptr<>()
+ return get_impl_ptr(static_cast<PointerType>(nullptr));
+ }
+
+ /*!
+ @brief get a pointer value (implicit)
+ @copydoc get_ptr()
+ */
+ template<typename PointerType, typename
+ std::enable_if<
+ std::is_pointer<PointerType>::value
+ and std::is_const<PointerType>::value
+ , int>::type = 0>
+ const PointerType get_ptr() const noexcept
+ {
+ // delegate the call to get_impl_ptr<>() const
+ return get_impl_ptr(static_cast<const PointerType>(nullptr));
+ }
+
+ /*!
+ @brief get a value (implicit)
+
+ Implict type conversion between the JSON value and a compatible value. The
+ call is realized by calling @ref get() const.
+
+ @tparam ValueType non-pointer type compatible to the JSON value, for
+ instance `int` for JSON integer numbers, `bool` for JSON booleans, or
+ `std::vector` types for JSON arrays
+
+ @return copy of the JSON value, converted to type @a ValueType
+
+ @throw std::domain_error in case passed type @a ValueType is incompatible
+ to JSON, thrown by @ref get() const
+
+ @complexity Linear in the size of the JSON value.
+
+ @liveexample{The example below shows serveral conversions from JSON values
+ to other types. There a few things to note: (1) Floating-point numbers can
+ be converted to integers\, (2) A JSON array can be converted to a standard
+ `std::vector<short>`\, (3) A JSON object can be converted to C++
+ assiciative containers such as `std::unordered_map<std::string\,
+ json>`.,operator__ValueType}
+ */
+ template<typename ValueType, typename
+ std::enable_if<
+ not std::is_pointer<ValueType>::value
+ , int>::type = 0>
+ operator ValueType() const
+ {
+ // delegate the call to get<>() const
+ return get<ValueType>();
+ }
+
+ /// @}
+
+
+ ////////////////////
+ // element access //
+ ////////////////////
+
+ /// @name element access
+ /// @{
+
+ /*!
+ @brief access specified array element with bounds checking
+
+ Returns a reference to the element at specified location @a idx, with
+ bounds checking.
+
+ @param[in] idx index of the element to access
+
+ @return reference to the element at index @a idx
+
+ @throw std::domain_error if JSON is not an array
+ @throw std::out_of_range if the index @a idx is out of range of the array;
+ that is, `idx >= size()`
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how array elements can be read and
+ written using at.,at__size_type}
+ */
+ reference at(size_type idx)
+ {
+ // at only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use at() with " + type_name());
+ }
+
+ return m_value.array->at(idx);
+ }
+
+ /*!
+ @brief access specified array element with bounds checking
+
+ Returns a const reference to the element at specified location @a idx, with
+ bounds checking.
+
+ @param[in] idx index of the element to access
+
+ @return const reference to the element at index @a idx
+
+ @throw std::domain_error if JSON is not an array
+ @throw std::out_of_range if the index @a idx is out of range of the array;
+ that is, `idx >= size()`
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how array elements can be read using
+ at.,at__size_type_const}
+ */
+ const_reference at(size_type idx) const
+ {
+ // at only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use at() with " + type_name());
+ }
+
+ return m_value.array->at(idx);
+ }
+
+ /*!
+ @brief access specified object element with bounds checking
+
+ Returns a reference to the element at with specified key @a key, with
+ bounds checking.
+
+ @param[in] key key of the element to access
+
+ @return reference to the element at key @a key
+
+ @throw std::domain_error if JSON is not an object
+ @throw std::out_of_range if the key @a key is is not stored in the object;
+ that is, `find(key) == end()`
+
+ @complexity Logarithmic in the size of the container.
+
+ @liveexample{The example below shows how object elements can be read and
+ written using at.,at__object_t_key_type}
+ */
+ reference at(const typename object_t::key_type& key)
+ {
+ // at only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use at() with " + type_name());
+ }
+
+ return m_value.object->at(key);
+ }
+
+ /*!
+ @brief access specified object element with bounds checking
+
+ Returns a const reference to the element at with specified key @a key, with
+ bounds checking.
+
+ @param[in] key key of the element to access
+
+ @return const reference to the element at key @a key
+
+ @throw std::domain_error if JSON is not an object
+ @throw std::out_of_range if the key @a key is is not stored in the object;
+ that is, `find(key) == end()`
+
+ @complexity Logarithmic in the size of the container.
+
+ @liveexample{The example below shows how object elements can be read using
+ at.,at__object_t_key_type_const}
+ */
+ const_reference at(const typename object_t::key_type& key) const
+ {
+ // at only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use at() with " + type_name());
+ }
+
+ return m_value.object->at(key);
+ }
+
+ /*!
+ @brief access specified array element
+
+ Returns a reference to the element at specified location @a idx.
+
+ @note If @a idx is beyond the range of the array (i.e., `idx >= size()`),
+ then the array is silently filled up with `null` values to make `idx` a
+ valid reference to the last stored element.
+
+ @param[in] idx index of the element to access
+
+ @return reference to the element at index @a idx
+
+ @throw std::domain_error if JSON is not an array or null
+
+ @complexity Constant if @a idx is in the range of the array. Otherwise
+ linear in `idx - size()`.
+
+ @liveexample{The example below shows how array elements can be read and
+ written using [] operator. Note the addition of `null`
+ values.,operatorarray__size_type}
+ */
+ reference operator[](size_type idx)
+ {
+ // implicitly convert null to object
+ if (m_type == value_t::null)
+ {
+ m_type = value_t::array;
+ m_value.array = create<array_t>();
+ }
+
+ // [] only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use operator[] with " + type_name());
+ }
+
+ for (size_t i = m_value.array->size(); i <= idx; ++i)
+ {
+ m_value.array->push_back(basic_json());
+ }
+
+ return m_value.array->operator[](idx);
+ }
+
+ /*!
+ @brief access specified array element
+
+ Returns a const reference to the element at specified location @a idx.
+
+ @param[in] idx index of the element to access
+
+ @return const reference to the element at index @a idx
+
+ @throw std::domain_error if JSON is not an array
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how array elements can be read using
+ the [] operator.,operatorarray__size_type_const}
+ */
+ const_reference operator[](size_type idx) const
+ {
+ // at only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use operator[] with " + type_name());
+ }
+
+ return m_value.array->operator[](idx);
+ }
+
+ /*!
+ @brief access specified object element
+
+ Returns a reference to the element at with specified key @a key.
+
+ @note If @a key is not found in the object, then it is silently added to
+ the object and filled with a `null` value to make `key` a valid reference.
+ In case the value was `null` before, it is converted to an object.
+
+ @param[in] key key of the element to access
+
+ @return reference to the element at key @a key
+
+ @throw std::domain_error if JSON is not an object or null
+
+ @complexity Logarithmic in the size of the container.
+
+ @liveexample{The example below shows how object elements can be read and
+ written using the [] operator.,operatorarray__key_type}
+ */
+ reference operator[](const typename object_t::key_type& key)
+ {
+ // implicitly convert null to object
+ if (m_type == value_t::null)
+ {
+ m_type = value_t::object;
+ m_value.object = create<object_t>();
+ }
+
+ // [] only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use operator[] with " + type_name());
+ }
+
+ return m_value.object->operator[](key);
+ }
+
+ /*!
+ @brief access specified object element
+
+ Returns a reference to the element at with specified key @a key.
+
+ @param[in] key key of the element to access
+
+ @return reference to the element at key @a key
+
+ @throw std::domain_error if JSON is not an object or null
+
+ @complexity Logarithmic in the size of the container.
+
+ @liveexample{The example below shows how object elements can be read using
+ the [] operator.,operatorarray__key_type_const}
+ */
+ const_reference operator[](const typename object_t::key_type& key) const
+ {
+ // at only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use operator[] with " + type_name());
+ }
+
+ return m_value.object->operator[](key);
+ }
+
+ /*!
+ @brief access specified object element
+
+ Returns a reference to the element at with specified key @a key.
+
+ @note If @a key is not found in the object, then it is silently added to
+ the object and filled with a `null` value to make `key` a valid reference.
+ In case the value was `null` before, it is converted to an object.
+
+ @note This function is required for compatibility reasons with Clang.
+
+ @param[in] key key of the element to access
+
+ @return reference to the element at key @a key
+
+ @throw std::domain_error if JSON is not an object or null
+
+ @complexity Logarithmic in the size of the container.
+
+ @liveexample{The example below shows how object elements can be read and
+ written using the [] operator.,operatorarray__key_type}
+ */
+ template<typename T, std::size_t n>
+ reference operator[](const T (&key)[n])
+ {
+ // implicitly convert null to object
+ if (m_type == value_t::null)
+ {
+ m_type = value_t::object;
+ m_value = value_t::object;
+ }
+
+ // at only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use operator[] with " + type_name());
+ }
+
+ return m_value.object->operator[](key);
+ }
+
+ /*!
+ @brief access specified object element
+
+ Returns a reference to the element at with specified key @a key.
+
+ @note This function is required for compatibility reasons with Clang.
+
+ @param[in] key key of the element to access
+
+ @return reference to the element at key @a key
+
+ @throw std::domain_error if JSON is not an object or null
+
+ @complexity Logarithmic in the size of the container.
+
+ @liveexample{The example below shows how object elements can be read using
+ the [] operator.,operatorarray__key_type_const}
+ */
+ template<typename T, std::size_t n>
+ const_reference operator[](const T (&key)[n]) const
+ {
+ // at only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use operator[] with " + type_name());
+ }
+
+ return m_value.object->operator[](key);
+ }
+
+ /*!
+ @brief access the first element
+
+ Returns a reference to the first element in the container. For a JSON
+ container `c`, the expression `c.front()` is equivalent to `*c.begin()`.
+
+ @return In case of a structured type (array or object), a reference to the
+ first element is returned. In cast of number, string, or boolean values, a
+ reference to the value is returned.
+
+ @complexity Constant.
+
+ @note Calling `front` on an empty container is undefined.
+
+ @throw std::out_of_range when called on null value
+
+ @liveexample{The following code shows an example for @ref front.,front}
+ */
+ reference front()
+ {
+ return *begin();
+ }
+
+ /*!
+ @copydoc basic_json::front()
+ */
+ const_reference front() const
+ {
+ return *cbegin();
+ }
+
+ /*!
+ @brief access the last element
+
+ Returns a reference to the last element in the container. For a JSON
+ container `c`, the expression `c.back()` is equivalent to `{ auto tmp =
+ c.end(); --tmp; return *tmp; }`.
+
+ @return In case of a structured type (array or object), a reference to the
+ last element is returned. In cast of number, string, or boolean values, a
+ reference to the value is returned.
+
+ @complexity Constant.
+
+ @note Calling `back` on an empty container is undefined.
+
+ @throw std::out_of_range when called on null value.
+
+ @liveexample{The following code shows an example for @ref back.,back}
+ */
+ reference back()
+ {
+ auto tmp = end();
+ --tmp;
+ return *tmp;
+ }
+
+ /*!
+ @copydoc basic_json::back()
+ */
+ const_reference back() const
+ {
+ auto tmp = cend();
+ --tmp;
+ return *tmp;
+ }
+
+ /*!
+ @brief remove element given an iterator
+
+ Removes the element specified by iterator @a pos. Invalidates iterators and
+ references at or after the point of the erase, including the end()
+ iterator. The iterator @a pos must be valid and dereferenceable. Thus the
+ end() iterator (which is valid, but is not dereferencable) cannot be used
+ as a value for @a pos.
+
+ If called on a primitive type other than null, the resulting JSON value
+ will be `null`.
+
+ @param[in] pos iterator to the element to remove
+ @return Iterator following the last removed element. If the iterator @a pos
+ refers to the last element, the end() iterator is returned.
+
+ @tparam InteratorType an @ref iterator or @ref const_iterator
+
+ @throw std::domain_error if called on a `null` value
+ @throw std::domain_error if called on an iterator which does not belong to
+ the current JSON value
+ @throw std::out_of_range if called on a primitive type with invalid iterator
+ (i.e., any iterator which is not end())
+
+ @complexity The complexity depends on the type:
+ - objects: amortized constant
+ - arrays: linear in distance between pos and the end of the container
+ - strings: linear in the length of the string
+ - other types: constant
+
+ @liveexample{The example shows the result of erase for different JSON
+ types.,erase__IteratorType}
+ */
+ template <class InteratorType, typename
+ std::enable_if<
+ std::is_same<InteratorType, typename basic_json_t::iterator>::value or
+ std::is_same<InteratorType, typename basic_json_t::const_iterator>::value
+ , int>::type
+ = 0>
+ InteratorType erase(InteratorType pos)
+ {
+ // make sure iterator fits the current value
+ if (this != pos.m_object)
+ {
+ throw std::domain_error("iterator does not fit current value");
+ }
+
+ InteratorType result = end();
+
+ switch (m_type)
+ {
+ case value_t::number_integer:
+ case value_t::number_float:
+ case value_t::boolean:
+ case value_t::string:
+ {
+ if (not pos.m_it.primitive_iterator.is_begin())
+ {
+ throw std::out_of_range("iterator out of range");
+ }
+
+ if (m_type == value_t::string)
+ {
+ delete m_value.string;
+ m_value.string = nullptr;
+ }
+
+ m_type = value_t::null;
+ break;
+ }
+
+ case value_t::object:
+ {
+ result.m_it.object_iterator = m_value.object->erase(pos.m_it.object_iterator);
+ break;
+ }
+
+ case value_t::array:
+ {
+ result.m_it.array_iterator = m_value.array->erase(pos.m_it.array_iterator);
+ break;
+ }
+
+ default:
+ {
+ throw std::domain_error("cannot use erase() with " + type_name());
+ }
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief remove elements given an iterator range
+
+ Removes the element specified by the range `[first; last)`. Invalidates
+ iterators and references at or after the point of the erase, including the
+ end() iterator. The iterator @a first does not need to be dereferenceable
+ if `first == last`: erasing an empty range is a no-op.
+
+ If called on a primitive type other than null, the resulting JSON value
+ will be `null`.
+
+ @param[in] first iterator to the beginning of the range to remove
+ @param[in] last iterator past the end of the range to remove
+ @return Iterator following the last removed element. If the iterator @a
+ second refers to the last element, the end() iterator is returned.
+
+ @tparam InteratorType an @ref iterator or @ref const_iterator
+
+ @throw std::domain_error if called on a `null` value
+ @throw std::domain_error if called on iterators which does not belong to
+ the current JSON value
+ @throw std::out_of_range if called on a primitive type with invalid iterators
+ (i.e., if `first != begin()` and `last != end()`)
+
+ @complexity The complexity depends on the type:
+ - objects: `log(size()) + std::distance(first, last)`
+ - arrays: linear in the distance between @a first and @a last, plus linear
+ in the distance between @a last and end of the container
+ - strings: linear in the length of the string
+ - other types: constant
+
+ @liveexample{The example shows the result of erase for different JSON
+ types.,erase__IteratorType_IteratorType}
+ */
+ template <class InteratorType, typename
+ std::enable_if<
+ std::is_same<InteratorType, typename basic_json_t::iterator>::value or
+ std::is_same<InteratorType, typename basic_json_t::const_iterator>::value
+ , int>::type
+ = 0>
+ InteratorType erase(InteratorType first, InteratorType last)
+ {
+ // make sure iterator fits the current value
+ if (this != first.m_object or this != last.m_object)
+ {
+ throw std::domain_error("iterators do not fit current value");
+ }
+
+ InteratorType result = end();
+
+ switch (m_type)
+ {
+ case value_t::number_integer:
+ case value_t::number_float:
+ case value_t::boolean:
+ case value_t::string:
+ {
+ if (not first.m_it.primitive_iterator.is_begin() or not last.m_it.primitive_iterator.is_end())
+ {
+ throw std::out_of_range("iterators out of range");
+ }
+
+ if (m_type == value_t::string)
+ {
+ delete m_value.string;
+ m_value.string = nullptr;
+ }
+
+ m_type = value_t::null;
+ break;
+ }
+
+ case value_t::object:
+ {
+ result.m_it.object_iterator = m_value.object->erase(first.m_it.object_iterator,
+ last.m_it.object_iterator);
+ break;
+ }
+
+ case value_t::array:
+ {
+ result.m_it.array_iterator = m_value.array->erase(first.m_it.array_iterator,
+ last.m_it.array_iterator);
+ break;
+ }
+
+ default:
+ {
+ throw std::domain_error("cannot use erase with " + type_name());
+ }
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief remove element from a JSON object given a key
+
+ Removes elements from a JSON object with the key value @a key.
+
+ @param[in] key value of the elements to remove
+
+ @return Number of elements removed. If ObjectType is the default `std::map`
+ type, the return value will always be `0` (@a key was not found) or `1` (@a
+ key was found).
+
+ @throw std::domain_error when called on a type other than JSON object
+
+ @complexity `log(size()) + count(key)`
+
+ @liveexample{The example shows the effect of erase.,erase__key_type}
+ */
+ size_type erase(const typename object_t::key_type& key)
+ {
+ // this erase only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use erase() with " + type_name());
+ }
+
+ return m_value.object->erase(key);
+ }
+
+ /*!
+ @brief remove element from a JSON array given an index
+
+ Removes element from a JSON array at the index @a idx.
+
+ @param[in] idx index of the element to remove
+
+ @throw std::domain_error when called on a type other than JSON array
+ @throw std::out_of_range when `idx >= size()`
+
+ @complexity Linear in distance between @a idx and the end of the container.
+
+ @liveexample{The example shows the effect of erase.,erase__size_type}
+ */
+ void erase(const size_type idx)
+ {
+ // this erase only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use erase() with " + type_name());
+ }
+
+ if (idx >= size())
+ {
+ throw std::out_of_range("index out of range");
+ }
+
+ m_value.array->erase(m_value.array->begin() + static_cast<difference_type>(idx));
+ }
+
+ /*!
+ @brief find an element in a JSON object
+
+ Finds an element in a JSON object with key equivalent to @a key. If the
+ element is not found or the JSON value is not an object, end() is returned.
+
+ @param[in] key key value of the element to search for
+
+ @return Iterator to an element with key equivalent to @a key. If no such
+ element is found, past-the-end (see end()) iterator is returned.
+
+ @complexity Logarithmic in the size of the JSON object.
+
+ @liveexample{The example shows how find is used.,find__key_type}
+ */
+ iterator find(typename object_t::key_type key)
+ {
+ auto result = end();
+
+ if (m_type == value_t::object)
+ {
+ result.m_it.object_iterator = m_value.object->find(key);
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief find an element in a JSON object
+ @copydoc find(typename object_t::key_type)
+ */
+ const_iterator find(typename object_t::key_type key) const
+ {
+ auto result = cend();
+
+ if (m_type == value_t::object)
+ {
+ result.m_it.object_iterator = m_value.object->find(key);
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief returns the number of occurrences of a key in a JSON object
+
+ Returns the number of elements with key @a key. If ObjectType is the
+ default `std::map` type, the return value will always be `0` (@a key was
+ not found) or `1` (@a key was found).
+
+ @param[in] key key value of the element to count
+
+ @return Number of elements with key @a key. If the JSON value is not an
+ object, the return value will be `0`.
+
+ @complexity Logarithmic in the size of the JSON object.
+
+ @liveexample{The example shows how count is used.,count}
+ */
+ size_type count(typename object_t::key_type key) const
+ {
+ // return 0 for all nonobject types
+ return (m_type == value_t::object) ? m_value.object->count(key) : 0;
+ }
+
+ /// @}
+
+
+ ///////////////
+ // iterators //
+ ///////////////
+
+ /// @name iterators
+ /// @{
+
+ /*!
+ @brief returns an iterator to the first element
+
+ Returns an iterator to the first element.
+
+ @image html range-begin-end.svg "Illustration from cppreference.com"
+
+ @return iterator to the first element
+
+ @complexity Constant.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+
+ @liveexample{The following code shows an example for @ref begin.,begin}
+ */
+ iterator begin()
+ {
+ iterator result(this);
+ result.set_begin();
+ return result;
+ }
+
+ /*!
+ @copydoc basic_json::cbegin()
+ */
+ const_iterator begin() const
+ {
+ return cbegin();
+ }
+
+ /*!
+ @brief returns a const iterator to the first element
+
+ Returns a const iterator to the first element.
+
+ @image html range-begin-end.svg "Illustration from cppreference.com"
+
+ @return const iterator to the first element
+
+ @complexity Constant.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+ - Has the semantics of `const_cast<const basic_json&>(*this).begin()`.
+
+ @liveexample{The following code shows an example for @ref cbegin.,cbegin}
+ */
+ const_iterator cbegin() const
+ {
+ const_iterator result(this);
+ result.set_begin();
+ return result;
+ }
+
+ /*!
+ @brief returns an iterator to one past the last element
+
+ Returns an iterator to one past the last element.
+
+ @image html range-begin-end.svg "Illustration from cppreference.com"
+
+ @return iterator one past the last element
+
+ @complexity Constant.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+
+ @liveexample{The following code shows an example for @ref end.,end}
+ */
+ iterator end()
+ {
+ iterator result(this);
+ result.set_end();
+ return result;
+ }
+
+ /*!
+ @copydoc basic_json::cend()
+ */
+ const_iterator end() const
+ {
+ return cend();
+ }
+
+ /*!
+ @brief returns a const iterator to one past the last element
+
+ Returns a const iterator to one past the last element.
+
+ @image html range-begin-end.svg "Illustration from cppreference.com"
+
+ @return const iterator one past the last element
+
+ @complexity Constant.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+ - Has the semantics of `const_cast<const basic_json&>(*this).end()`.
+
+ @liveexample{The following code shows an example for @ref cend.,cend}
+ */
+ const_iterator cend() const
+ {
+ const_iterator result(this);
+ result.set_end();
+ return result;
+ }
+
+ /*!
+ @brief returns an iterator to the reverse-beginning
+
+ Returns an iterator to the reverse-beginning; that is, the last element.
+
+ @image html range-rbegin-rend.svg "Illustration from cppreference.com"
+
+ @complexity Constant.
+
+ @requirement This function satisfies the ReversibleContainer requirements:
+ - The complexity is constant.
+ - Has the semantics of `reverse_iterator(end())`.
+
+ @liveexample{The following code shows an example for @ref rbegin.,rbegin}
+ */
+ reverse_iterator rbegin()
+ {
+ return reverse_iterator(end());
+ }
+
+ /*!
+ @copydoc basic_json::crbegin()
+ */
+ const_reverse_iterator rbegin() const
+ {
+ return crbegin();
+ }
+
+ /*!
+ @brief returns an iterator to the reverse-end
+
+ Returns an iterator to the reverse-end; that is, one before the first
+ element.
+
+ @image html range-rbegin-rend.svg "Illustration from cppreference.com"
+
+ @complexity Constant.
+
+ @requirement This function satisfies the ReversibleContainer requirements:
+ - The complexity is constant.
+ - Has the semantics of `reverse_iterator(begin())`.
+
+ @liveexample{The following code shows an example for @ref rend.,rend}
+ */
+ reverse_iterator rend()
+ {
+ return reverse_iterator(begin());
+ }
+
+ /*!
+ @copydoc basic_json::crend()
+ */
+ const_reverse_iterator rend() const
+ {
+ return crend();
+ }
+
+ /*!
+ @brief returns a const reverse iterator to the last element
+
+ Returns a const iterator to the reverse-beginning; that is, the last
+ element.
+
+ @image html range-rbegin-rend.svg "Illustration from cppreference.com"
+
+ @complexity Constant.
+
+ @requirement This function satisfies the ReversibleContainer requirements:
+ - The complexity is constant.
+ - Has the semantics of `const_cast<const basic_json&>(*this).rbegin()`.
+
+ @liveexample{The following code shows an example for @ref crbegin.,crbegin}
+ */
+ const_reverse_iterator crbegin() const
+ {
+ return const_reverse_iterator(cend());
+ }
+
+ /*!
+ @brief returns a const reverse iterator to one before the first
+
+ Returns a const reverse iterator to the reverse-end; that is, one before
+ the first element.
+
+ @image html range-rbegin-rend.svg "Illustration from cppreference.com"
+
+ @complexity Constant.
+
+ @requirement This function satisfies the ReversibleContainer requirements:
+ - The complexity is constant.
+ - Has the semantics of `const_cast<const basic_json&>(*this).rend()`.
+
+ @liveexample{The following code shows an example for @ref crend.,crend}
+ */
+ const_reverse_iterator crend() const
+ {
+ return const_reverse_iterator(cbegin());
+ }
+
+ /// @}
+
+
+ //////////////
+ // capacity //
+ //////////////
+
+ /// @name capacity
+ /// @{
+
+ /*!
+ @brief checks whether the container is empty
+
+ Checks if a JSON value has no elements.
+
+ @return The return value depends on the different types and is
+ defined as follows:
+ Value type | return value
+ ----------- | -------------
+ null | @c true
+ boolean | @c false
+ string | @c false
+ number | @c false
+ object | result of function object_t::empty()
+ array | result of function array_t::empty()
+
+ @complexity Constant, as long as @ref array_t and @ref object_t satisfy the
+ Container concept; that is, their empty() functions have
+ constant complexity.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+ - Has the semantics of `begin() == end()`.
+
+ @liveexample{The following code uses @ref empty to check if a @ref json
+ object contains any elements.,empty}
+ */
+ bool empty() const noexcept
+ {
+ switch (m_type)
+ {
+ case (value_t::null):
+ {
+ return true;
+ }
+
+ case (value_t::array):
+ {
+ return m_value.array->empty();
+ }
+
+ case (value_t::object):
+ {
+ return m_value.object->empty();
+ }
+
+ default:
+ {
+ // all other types are nonempty
+ return false;
+ }
+ }
+ }
+
+ /*!
+ @brief returns the number of elements
+
+ Returns the number of elements in a JSON value.
+
+ @return The return value depends on the different types and is
+ defined as follows:
+ Value type | return value
+ ----------- | -------------
+ null | @c 0
+ boolean | @c 1
+ string | @c 1
+ number | @c 1
+ object | result of function object_t::size()
+ array | result of function array_t::size()
+
+ @complexity Constant, as long as @ref array_t and @ref object_t satisfy the
+ Container concept; that is, their size() functions have
+ constant complexity.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+ - Has the semantics of `std::distance(begin(), end())`.
+
+ @liveexample{The following code calls @ref size on the different value
+ types.,size}
+ */
+ size_type size() const noexcept
+ {
+ switch (m_type)
+ {
+ case (value_t::null):
+ {
+ return 0;
+ }
+
+ case (value_t::array):
+ {
+ return m_value.array->size();
+ }
+
+ case (value_t::object):
+ {
+ return m_value.object->size();
+ }
+
+ default:
+ {
+ // all other types have size 1
+ return 1;
+ }
+ }
+ }
+
+ /*!
+ @brief returns the maximum possible number of elements
+
+ Returns the maximum number of elements a JSON value is able to hold due to
+ system or library implementation limitations, i.e. `std::distance(begin(),
+ end())` for the JSON value.
+
+ @return The return value depends on the different types and is
+ defined as follows:
+ Value type | return value
+ ----------- | -------------
+ null | @c 0 (same as size())
+ boolean | @c 1 (same as size())
+ string | @c 1 (same as size())
+ number | @c 1 (same as size())
+ object | result of function object_t::max_size()
+ array | result of function array_t::max_size()
+
+ @complexity Constant, as long as @ref array_t and @ref object_t satisfy the
+ Container concept; that is, their max_size() functions have
+ constant complexity.
+
+ @requirement This function satisfies the Container requirements:
+ - The complexity is constant.
+ - Has the semantics of returning `b.size()` where `b` is the largest
+ possible JSON value.
+
+ @liveexample{The following code calls @ref max_size on the different value
+ types. Note the output is implementation specific.,max_size}
+ */
+ size_type max_size() const noexcept
+ {
+ switch (m_type)
+ {
+ case (value_t::array):
+ {
+ return m_value.array->max_size();
+ }
+
+ case (value_t::object):
+ {
+ return m_value.object->max_size();
+ }
+
+ default:
+ {
+ // all other types have max_size() == size()
+ return size();
+ }
+ }
+ }
+
+ /// @}
+
+
+ ///////////////
+ // modifiers //
+ ///////////////
+
+ /// @name modifiers
+ /// @{
+
+ /*!
+ @brief clears the contents
+
+ Clears the content of a JSON value and resets it to the default value as
+ if @ref basic_json(value_t) would have been called:
+
+ Value type | initial value
+ ----------- | -------------
+ null | `null`
+ boolean | `false`
+ string | `""`
+ number | `0`
+ object | `{}`
+ array | `[]`
+
+ @note Floating-point numbers are set to `0.0` which will be serialized to
+ `0`. The vale type remains @ref number_float_t.
+
+ @complexity Linear in the size of the JSON value.
+
+ @liveexample{The example below shows the effect of @ref clear to different
+ JSON types.,clear}
+ */
+ void clear() noexcept
+ {
+ switch (m_type)
+ {
+ case (value_t::null):
+ case (value_t::discarded):
+ {
+ break;
+ }
+
+ case (value_t::number_integer):
+ {
+ m_value.number_integer = 0;
+ break;
+ }
+
+ case (value_t::number_float):
+ {
+ m_value.number_float = 0.0;
+ break;
+ }
+
+ case (value_t::boolean):
+ {
+ m_value.boolean = false;
+ break;
+ }
+
+ case (value_t::string):
+ {
+ m_value.string->clear();
+ break;
+ }
+
+ case (value_t::array):
+ {
+ m_value.array->clear();
+ break;
+ }
+
+ case (value_t::object):
+ {
+ m_value.object->clear();
+ break;
+ }
+ }
+ }
+
+ /*!
+ @brief add an object to an array
+
+ Appends the given element @a value to the end of the JSON value. If the
+ function is called on a JSON null value, an empty array is created before
+ appending @a value.
+
+ @param value the value to add to the JSON array
+
+ @throw std::domain_error when called on a type other than JSON array or null
+
+ @complexity Amortized constant.
+
+ @liveexample{The example shows how `push_back` and `+=` can be used to add
+ elements to a JSON array. Note how the `null` value was silently converted
+ to a JSON array.,push_back}
+ */
+ void push_back(basic_json&& value)
+ {
+ // push_back only works for null objects or arrays
+ if (not(m_type == value_t::null or m_type == value_t::array))
+ {
+ throw std::domain_error("cannot use push_back() with " + type_name());
+ }
+
+ // transform null object into an array
+ if (m_type == value_t::null)
+ {
+ m_type = value_t::array;
+ m_value = value_t::array;
+ }
+
+ // add element to array (move semantics)
+ m_value.array->push_back(std::move(value));
+ // invalidate object
+ value.m_type = value_t::null;
+ }
+
+ /*!
+ @brief add an object to an array
+ @copydoc push_back(basic_json&&)
+ */
+ reference operator+=(basic_json&& value)
+ {
+ push_back(std::move(value));
+ return *this;
+ }
+
+ /*!
+ @brief add an object to an array
+ @copydoc push_back(basic_json&&)
+ */
+ void push_back(const basic_json& value)
+ {
+ // push_back only works for null objects or arrays
+ if (not(m_type == value_t::null or m_type == value_t::array))
+ {
+ throw std::domain_error("cannot use push_back() with " + type_name());
+ }
+
+ // transform null object into an array
+ if (m_type == value_t::null)
+ {
+ m_type = value_t::array;
+ m_value = value_t::array;
+ }
+
+ // add element to array
+ m_value.array->push_back(value);
+ }
+
+ /*!
+ @brief add an object to an array
+ @copydoc push_back(basic_json&&)
+ */
+ reference operator+=(const basic_json& value)
+ {
+ push_back(value);
+ return *this;
+ }
+
+ /*!
+ @brief add an object to an object
+
+ Inserts the given element @a value to the JSON object. If the function is
+ called on a JSON null value, an empty object is created before inserting @a
+ value.
+
+ @param[in] value the value to add to the JSON object
+
+ @throw std::domain_error when called on a type other than JSON object or
+ null
+
+ @complexity Logarithmic in the size of the container, O(log(`size()`)).
+
+ @liveexample{The example shows how `push_back` and `+=` can be used to add
+ elements to a JSON object. Note how the `null` value was silently converted
+ to a JSON object.,push_back__object_t__value}
+ */
+ void push_back(const typename object_t::value_type& value)
+ {
+ // push_back only works for null objects or objects
+ if (not(m_type == value_t::null or m_type == value_t::object))
+ {
+ throw std::domain_error("cannot use push_back() with " + type_name());
+ }
+
+ // transform null object into an object
+ if (m_type == value_t::null)
+ {
+ m_type = value_t::object;
+ m_value = value_t::object;
+ }
+
+ // add element to array
+ m_value.object->insert(value);
+ }
+
+ /*!
+ @brief add an object to an object
+ @copydoc push_back(const typename object_t::value_type&)
+ */
+ reference operator+=(const typename object_t::value_type& value)
+ {
+ push_back(value);
+ return operator[](value.first);
+ }
+
+ /*!
+ @brief inserts element
+
+ Inserts element @a value before iterator @a pos.
+
+ @param[in] pos iterator before which the content will be inserted; may be
+ the end() iterator
+ @param[in] value element to insert
+ @return iterator pointing to the inserted @a value.
+
+ @throw std::domain_error if called on JSON values other than arrays
+ @throw std::domain_error if @a pos is not an iterator of *this
+
+ @complexity Constant plus linear in the distance between pos and end of the
+ container.
+
+ @liveexample{The example shows how insert is used.,insert}
+ */
+ iterator insert(const_iterator pos, const basic_json& value)
+ {
+ // insert only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use insert() with " + type_name());
+ }
+
+ // check if iterator pos fits to this JSON value
+ if (pos.m_object != this)
+ {
+ throw std::domain_error("iterator does not fit current value");
+ }
+
+ // insert to array and return iterator
+ iterator result(this);
+ result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, value);
+ return result;
+ }
+
+ /*!
+ @brief inserts element
+ @copydoc insert(const_iterator, const basic_json&)
+ */
+ iterator insert(const_iterator pos, basic_json&& value)
+ {
+ return insert(pos, value);
+ }
+
+ /*!
+ @brief inserts elements
+
+ Inserts @a count copies of @a value before iterator @a pos.
+
+ @param[in] pos iterator before which the content will be inserted; may be
+ the end() iterator
+ @param[in] count number of copies of @a value to insert
+ @param[in] value element to insert
+ @return iterator pointing to the first element inserted, or @a pos if
+ `count==0`
+
+ @throw std::domain_error if called on JSON values other than arrays
+ @throw std::domain_error if @a pos is not an iterator of *this
+
+ @complexity Linear in @a count plus linear in the distance between @a pos
+ and end of the container.
+
+ @liveexample{The example shows how insert is used.,insert__count}
+ */
+ iterator insert(const_iterator pos, size_type count, const basic_json& value)
+ {
+ // insert only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use insert() with " + type_name());
+ }
+
+ // check if iterator pos fits to this JSON value
+ if (pos.m_object != this)
+ {
+ throw std::domain_error("iterator does not fit current value");
+ }
+
+ // insert to array and return iterator
+ iterator result(this);
+ result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, count, value);
+ return result;
+ }
+
+ /*!
+ @brief inserts elements
+
+ Inserts elements from range `[first, last)` before iterator @a pos.
+
+ @param[in] pos iterator before which the content will be inserted; may be
+ the end() iterator
+ @param[in] first begin of the range of elements to insert
+ @param[in] last end of the range of elements to insert
+
+ @throw std::domain_error if called on JSON values other than arrays
+ @throw std::domain_error if @a pos is not an iterator of *this
+ @throw std::domain_error if @a first and @a last do not belong to the same
+ JSON value
+ @throw std::domain_error if @a first or @a last are iterators into
+ container for which insert is called
+ @return iterator pointing to the first element inserted, or @a pos if
+ `first==last`
+
+ @complexity Linear in `std::distance(first, last)` plus linear in the
+ distance between @a pos and end of the container.
+
+ @liveexample{The example shows how insert is used.,insert__range}
+ */
+ iterator insert(const_iterator pos, const_iterator first, const_iterator last)
+ {
+ // insert only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use insert() with " + type_name());
+ }
+
+ // check if iterator pos fits to this JSON value
+ if (pos.m_object != this)
+ {
+ throw std::domain_error("iterator does not fit current value");
+ }
+
+ if (first.m_object != last.m_object)
+ {
+ throw std::domain_error("iterators does not fit");
+ }
+
+ if (first.m_object == this or last.m_object == this)
+ {
+ throw std::domain_error("passed iterators may not belong to container");
+ }
+
+ // insert to array and return iterator
+ iterator result(this);
+ result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator,
+ first.m_it.array_iterator, last.m_it.array_iterator);
+ return result;
+ }
+
+ /*!
+ @brief inserts elements
+
+ Inserts elements from initializer list @a ilist before iterator @a pos.
+
+ @param[in] pos iterator before which the content will be inserted; may be
+ the end() iterator
+ @param[in] ilist initializer list to insert the values from
+
+ @throw std::domain_error if called on JSON values other than arrays
+ @throw std::domain_error if @a pos is not an iterator of *this
+ @return iterator pointing to the first element inserted, or @a pos if
+ `ilist` is empty
+
+ @complexity Linear in `ilist.size()` plus linear in the distance between @a
+ pos and end of the container.
+
+ @liveexample{The example shows how insert is used.,insert__ilist}
+ */
+ iterator insert(const_iterator pos, std::initializer_list<basic_json> ilist)
+ {
+ // insert only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use insert() with " + type_name());
+ }
+
+ // check if iterator pos fits to this JSON value
+ if (pos.m_object != this)
+ {
+ throw std::domain_error("iterator does not fit current value");
+ }
+
+ // insert to array and return iterator
+ iterator result(this);
+ result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, ilist);
+ return result;
+ }
+
+ /*!
+ @brief exchanges the values
+
+ Exchanges the contents of the JSON value with those of @a other. Does not
+ invoke any move, copy, or swap operations on individual elements. All
+ iterators and references remain valid. The past-the-end iterator is
+ invalidated.
+
+ @param[in,out] other JSON value to exchange the contents with
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how JSON arrays can be
+ swapped.,swap__reference}
+ */
+ void swap(reference other) noexcept (
+ std::is_nothrow_move_constructible<value_t>::value and
+ std::is_nothrow_move_assignable<value_t>::value and
+ std::is_nothrow_move_constructible<json_value>::value and
+ std::is_nothrow_move_assignable<json_value>::value
+ )
+ {
+ std::swap(m_type, other.m_type);
+ std::swap(m_value, other.m_value);
+ }
+
+ /*!
+ @brief exchanges the values
+
+ Exchanges the contents of a JSON array with those of @a other. Does not
+ invoke any move, copy, or swap operations on individual elements. All
+ iterators and references remain valid. The past-the-end iterator is
+ invalidated.
+
+ @param[in,out] other array to exchange the contents with
+
+ @throw std::domain_error when JSON value is not an array
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how JSON values can be
+ swapped.,swap__array_t}
+ */
+ void swap(array_t& other)
+ {
+ // swap only works for arrays
+ if (m_type != value_t::array)
+ {
+ throw std::domain_error("cannot use swap() with " + type_name());
+ }
+
+ // swap arrays
+ std::swap(*(m_value.array), other);
+ }
+
+ /*!
+ @brief exchanges the values
+
+ Exchanges the contents of a JSON object with those of @a other. Does not
+ invoke any move, copy, or swap operations on individual elements. All
+ iterators and references remain valid. The past-the-end iterator is
+ invalidated.
+
+ @param[in,out] other object to exchange the contents with
+
+ @throw std::domain_error when JSON value is not an object
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how JSON values can be
+ swapped.,swap__object_t}
+ */
+ void swap(object_t& other)
+ {
+ // swap only works for objects
+ if (m_type != value_t::object)
+ {
+ throw std::domain_error("cannot use swap() with " + type_name());
+ }
+
+ // swap objects
+ std::swap(*(m_value.object), other);
+ }
+
+ /*!
+ @brief exchanges the values
+
+ Exchanges the contents of a JSON string with those of @a other. Does not
+ invoke any move, copy, or swap operations on individual elements. All
+ iterators and references remain valid. The past-the-end iterator is
+ invalidated.
+
+ @param[in,out] other string to exchange the contents with
+
+ @throw std::domain_error when JSON value is not a string
+
+ @complexity Constant.
+
+ @liveexample{The example below shows how JSON values can be
+ swapped.,swap__string_t}
+ */
+ void swap(string_t& other)
+ {
+ // swap only works for strings
+ if (m_type != value_t::string)
+ {
+ throw std::domain_error("cannot use swap() with " + type_name());
+ }
+
+ // swap strings
+ std::swap(*(m_value.string), other);
+ }
+
+ /// @}
+
+
+ //////////////////////////////////////////
+ // lexicographical comparison operators //
+ //////////////////////////////////////////
+
+ /// @name lexicographical comparison operators
+ /// @{
+
+ private:
+ /*!
+ @brief comparison operator for JSON types
+
+ Returns an ordering that is similar to Python:
+ - order: null < boolean < number < object < array < string
+ - furthermore, each type is not smaller than itself
+ */
+ friend bool operator<(const value_t lhs, const value_t rhs)
+ {
+ static constexpr std::array<uint8_t, 7> order = {{
+ 0, // null
+ 3, // object
+ 4, // array
+ 5, // string
+ 1, // boolean
+ 2, // integer
+ 2 // float
+ }
+ };
+
+ // discarded values are not comparable
+ if (lhs == value_t::discarded or rhs == value_t::discarded)
+ {
+ return false;
+ }
+
+ return order[static_cast<std::size_t>(lhs)] < order[static_cast<std::size_t>(rhs)];
+ }
+
+ public:
+ /*!
+ @brief comparison: equal
+
+ Compares two JSON values for equality according to the following rules:
+ - Two JSON values are equal if (1) they are from the same type and (2)
+ their stored values are the same.
+ - Integer and floating-point numbers are automatically converted before
+ comparison. Floating-point numbers are compared indirectly: two
+ floating-point numbers `f1` and `f2` are considered equal if neither
+ `f1 > f2` nor `f2 > f1` holds.
+ - Two JSON null values are equal.
+
+ @param[in] lhs first JSON value to consider
+ @param[in] rhs second JSON value to consider
+ @return whether the values @a lhs and @a rhs are equal
+
+ @complexity Linear.
+
+ @liveexample{The example demonstrates comparing several JSON
+ types.,operator__equal}
+ */
+ friend bool operator==(const_reference lhs, const_reference rhs) noexcept
+ {
+ const auto lhs_type = lhs.type();
+ const auto rhs_type = rhs.type();
+
+ if (lhs_type == rhs_type)
+ {
+ switch (lhs_type)
+ {
+ case (value_t::array):
+ return *lhs.m_value.array == *rhs.m_value.array;
+ case (value_t::object):
+ return *lhs.m_value.object == *rhs.m_value.object;
+ case (value_t::null):
+ return true;
+ case (value_t::string):
+ return *lhs.m_value.string == *rhs.m_value.string;
+ case (value_t::boolean):
+ return lhs.m_value.boolean == rhs.m_value.boolean;
+ case (value_t::number_integer):
+ return lhs.m_value.number_integer == rhs.m_value.number_integer;
+ case (value_t::number_float):
+ return approx(lhs.m_value.number_float, rhs.m_value.number_float);
+ case (value_t::discarded):
+ return false;
+ }
+ }
+ else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_float)
+ {
+ return approx(static_cast<number_float_t>(lhs.m_value.number_integer),
+ rhs.m_value.number_float);
+ }
+ else if (lhs_type == value_t::number_float and rhs_type == value_t::number_integer)
+ {
+ return approx(lhs.m_value.number_float,
+ static_cast<number_float_t>(rhs.m_value.number_integer));
+ }
+ return false;
+ }
+
+ /*!
+ @brief comparison: equal
+
+ The functions compares the given JSON value against a null pointer. As the
+ null pointer can be used to initialize a JSON value to null, a comparison
+ of JSON value @a v with a null pointer should be equivalent to call
+ `v.is_null()`.
+
+ @param[in] v JSON value to consider
+ @return whether @a v is null
+
+ @complexity Constant.
+
+ @liveexample{The example compares several JSON types to the null pointer.
+ ,operator__equal__nullptr_t}
+ */
+ friend bool operator==(const_reference v, std::nullptr_t) noexcept
+ {
+ return v.is_null();
+ }
+
+ /*!
+ @brief comparison: equal
+ @copydoc operator==(const_reference, std::nullptr_t)
+ */
+ friend bool operator==(std::nullptr_t, const_reference v) noexcept
+ {
+ return v.is_null();
+ }
+
+ /*!
+ @brief comparison: not equal
+
+ Compares two JSON values for inequality by calculating `not (lhs == rhs)`.
+
+ @param[in] lhs first JSON value to consider
+ @param[in] rhs second JSON value to consider
+ @return whether the values @a lhs and @a rhs are not equal
+
+ @complexity Linear.
+
+ @liveexample{The example demonstrates comparing several JSON
+ types.,operator__notequal}
+ */
+ friend bool operator!=(const_reference lhs, const_reference rhs) noexcept
+ {
+ return not (lhs == rhs);
+ }
+
+ /*!
+ @brief comparison: not equal
+
+ The functions compares the given JSON value against a null pointer. As the
+ null pointer can be used to initialize a JSON value to null, a comparison
+ of JSON value @a v with a null pointer should be equivalent to call
+ `not v.is_null()`.
+
+ @param[in] v JSON value to consider
+ @return whether @a v is not null
+
+ @complexity Constant.
+
+ @liveexample{The example compares several JSON types to the null pointer.
+ ,operator__notequal__nullptr_t}
+ */
+ friend bool operator!=(const_reference v, std::nullptr_t) noexcept
+ {
+ return not v.is_null();
+ }
+
+ /*!
+ @brief comparison: not equal
+ @copydoc operator!=(const_reference, std::nullptr_t)
+ */
+ friend bool operator!=(std::nullptr_t, const_reference v) noexcept
+ {
+ return not v.is_null();
+ }
+
+ /*!
+ @brief comparison: less than
+
+ Compares whether one JSON value @a lhs is less than another JSON value @a
+ rhs according to the following rules:
+ - If @a lhs and @a rhs have the same type, the values are compared using
+ the default `<` operator.
+ - Integer and floating-point numbers are automatically converted before
+ comparison
+ - In case @a lhs and @a rhs have different types, the values are ignored
+ and the order of the types is considered, see
+ @ref operator<(const value_t, const value_t).
+
+ @param[in] lhs first JSON value to consider
+ @param[in] rhs second JSON value to consider
+ @return whether @a lhs is less than @a rhs
+
+ @complexity Linear.
+
+ @liveexample{The example demonstrates comparing several JSON
+ types.,operator__less}
+ */
+ friend bool operator<(const_reference lhs, const_reference rhs) noexcept
+ {
+ const auto lhs_type = lhs.type();
+ const auto rhs_type = rhs.type();
+
+ if (lhs_type == rhs_type)
+ {
+ switch (lhs_type)
+ {
+ case (value_t::array):
+ return *lhs.m_value.array < *rhs.m_value.array;
+ case (value_t::object):
+ return *lhs.m_value.object < *rhs.m_value.object;
+ case (value_t::null):
+ return false;
+ case (value_t::string):
+ return *lhs.m_value.string < *rhs.m_value.string;
+ case (value_t::boolean):
+ return lhs.m_value.boolean < rhs.m_value.boolean;
+ case (value_t::number_integer):
+ return lhs.m_value.number_integer < rhs.m_value.number_integer;
+ case (value_t::number_float):
+ return lhs.m_value.number_float < rhs.m_value.number_float;
+ case (value_t::discarded):
+ return false;
+ }
+ }
+ else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_float)
+ {
+ return static_cast<number_float_t>(lhs.m_value.number_integer) <
+ rhs.m_value.number_float;
+ }
+ else if (lhs_type == value_t::number_float and rhs_type == value_t::number_integer)
+ {
+ return lhs.m_value.number_float <
+ static_cast<number_float_t>(rhs.m_value.number_integer);
+ }
+
+ // We only reach this line if we cannot compare values. In that case,
+ // we compare types. Note we have to call the operator explicitly,
+ // because MSVC has problems otherwise.
+ return operator<(lhs_type, rhs_type);
+ }
+
+ /*!
+ @brief comparison: less than or equal
+
+ Compares whether one JSON value @a lhs is less than or equal to another
+ JSON value by calculating `not (rhs < lhs)`.
+
+ @param[in] lhs first JSON value to consider
+ @param[in] rhs second JSON value to consider
+ @return whether @a lhs is less than or equal to @a rhs
+
+ @complexity Linear.
+
+ @liveexample{The example demonstrates comparing several JSON
+ types.,operator__greater}
+ */
+ friend bool operator<=(const_reference lhs, const_reference rhs) noexcept
+ {
+ return not (rhs < lhs);
+ }
+
+ /*!
+ @brief comparison: greater than
+
+ Compares whether one JSON value @a lhs is greater than another
+ JSON value by calculating `not (lhs <= rhs)`.
+
+ @param[in] lhs first JSON value to consider
+ @param[in] rhs second JSON value to consider
+ @return whether @a lhs is greater than to @a rhs
+
+ @complexity Linear.
+
+ @liveexample{The example demonstrates comparing several JSON
+ types.,operator__lessequal}
+ */
+ friend bool operator>(const_reference lhs, const_reference rhs) noexcept
+ {
+ return not (lhs <= rhs);
+ }
+
+ /*!
+ @brief comparison: greater than or equal
+
+ Compares whether one JSON value @a lhs is greater than or equal to another
+ JSON value by calculating `not (lhs < rhs)`.
+
+ @param[in] lhs first JSON value to consider
+ @param[in] rhs second JSON value to consider
+ @return whether @a lhs is greater than or equal to @a rhs
+
+ @complexity Linear.
+
+ @liveexample{The example demonstrates comparing several JSON
+ types.,operator__greaterequal}
+ */
+ friend bool operator>=(const_reference lhs, const_reference rhs) noexcept
+ {
+ return not (lhs < rhs);
+ }
+
+ /// @}
+
+
+ ///////////////////
+ // serialization //
+ ///////////////////
+
+ /// @name serialization
+ /// @{
+
+ /*!
+ @brief serialize to stream
+
+ Serialize the given JSON value @a j to the output stream @a o. The JSON
+ value will be serialized using the @ref dump member function. The
+ indentation of the output can be controlled with the member variable
+ `width` of the output stream @a o. For instance, using the manipulator
+ `std::setw(4)` on @a o sets the indentation level to `4` and the
+ serialization result is the same as calling `dump(4)`.
+
+ @param[in,out] o stream to serialize to
+ @param[in] j JSON value to serialize
+
+ @return the stream @a o
+
+ @complexity Linear.
+
+ @liveexample{The example below shows the serialization with different
+ parameters to `width` to adjust the indentation level.,operator_serialize}
+ */
+ friend std::ostream& operator<<(std::ostream& o, const basic_json& j)
+ {
+ // read width member and use it as indentation parameter if nonzero
+ const bool pretty_print = (o.width() > 0);
+ const auto indentation = (pretty_print ? o.width() : 0);
+
+ // reset width to 0 for subsequent calls to this stream
+ o.width(0);
+
+ // do the actual serialization
+ j.dump(o, pretty_print, static_cast<unsigned int>(indentation));
+ return o;
+ }
+
+ /*!
+ @brief serialize to stream
+ @copydoc operator<<(std::ostream&, const basic_json&)
+ */
+ friend std::ostream& operator>>(const basic_json& j, std::ostream& o)
+ {
+ return o << j;
+ }
+
+ /// @}
+
+
+ /////////////////////
+ // deserialization //
+ /////////////////////
+
+ /// @name deserialization
+ /// @{
+
+ /*!
+ @brief deserialize from string
+
+ @param[in] s string to read a serialized JSON value from
+ @param[in] cb a parser callback function of type @ref parser_callback_t
+ which is used to control the deserialization by filtering unwanted values
+ (optional)
+
+ @return result of the deserialization
+
+ @complexity Linear in the length of the input. The parser is a predictive
+ LL(1) parser. The complexity can be higher if the parser callback function
+ @a cb has a super-linear complexity.
+
+ @liveexample{The example below demonstrates the parse function with and
+ without callback function.,parse__string__parser_callback_t}
+
+ @sa parse(std::istream&, parser_callback_t) for a version that reads from
+ an input stream
+ */
+ static basic_json parse(const string_t& s, parser_callback_t cb = nullptr)
+ {
+ return parser(s, cb).parse();
+ }
+
+ /*!
+ @brief deserialize from stream
+
+ @param[in,out] i stream to read a serialized JSON value from
+ @param[in] cb a parser callback function of type @ref parser_callback_t
+ which is used to control the deserialization by filtering unwanted values
+ (optional)
+
+ @return result of the deserialization
+
+ @complexity Linear in the length of the input. The parser is a predictive
+ LL(1) parser. The complexity can be higher if the parser callback function
+ @a cb has a super-linear complexity.
+
+ @liveexample{The example below demonstrates the parse function with and
+ without callback function.,parse__istream__parser_callback_t}
+
+ @sa parse(const string_t&, parser_callback_t) for a version that reads
+ from a string
+ */
+ static basic_json parse(std::istream& i, parser_callback_t cb = nullptr)
+ {
+ return parser(i, cb).parse();
+ }
+
+ static basic_json parse(std::istream&& i, parser_callback_t cb = nullptr)
+ {
+ return parser(i, cb).parse();
+ }
+
+ /*!
+ @brief deserialize from stream
+
+ Deserializes an input stream to a JSON value.
+
+ @param[in,out] i input stream to read a serialized JSON value from
+ @param[in,out] j JSON value to write the deserialized input to
+
+ @throw std::invalid_argument in case of parse errors
+
+ @complexity Linear in the length of the input. The parser is a predictive
+ LL(1) parser.
+
+ @liveexample{The example below shows how a JSON value is constructed by
+ reading a serialization from a stream.,operator_deserialize}
+
+ @sa parse(std::istream&, parser_callback_t) for a variant with a parser
+ callback function to filter values while parsing
+ */
+ friend std::istream& operator<<(basic_json& j, std::istream& i)
+ {
+ j = parser(i).parse();
+ return i;
+ }
+
+ /*!
+ @brief deserialize from stream
+ @copydoc operator<<(basic_json&, std::istream&)
+ */
+ friend std::istream& operator>>(std::istream& i, basic_json& j)
+ {
+ j = parser(i).parse();
+ return i;
+ }
+
+ /// @}
+
+
+ private:
+ ///////////////////////////
+ // convenience functions //
+ ///////////////////////////
+
+ /// return the type as string
+ string_t type_name() const
+ {
+ switch (m_type)
+ {
+ case (value_t::null):
+ {
+ return "null";
+ }
+
+ case (value_t::object):
+ {
+ return "object";
+ }
+
+ case (value_t::array):
+ {
+ return "array";
+ }
+
+ case (value_t::string):
+ {
+ return "string";
+ }
+
+ case (value_t::boolean):
+ {
+ return "boolean";
+ }
+
+ case (value_t::discarded):
+ {
+ return "discarded";
+ }
+
+ default:
+ {
+ return "number";
+ }
+ }
+ }
+
+ /*!
+ @brief calculates the extra space to escape a JSON string
+
+ @param[in] s the string to escape
+ @return the number of characters required to escape string @a s
+
+ @complexity Linear in the length of string @a s.
+ */
+ static std::size_t extra_space(const string_t& s) noexcept
+ {
+ std::size_t result = 0;
+
+ for (const auto& c : s)
+ {
+ switch (c)
+ {
+ case '"':
+ case '\\':
+ case '\b':
+ case '\f':
+ case '\n':
+ case '\r':
+ case '\t':
+ {
+ // from c (1 byte) to \x (2 bytes)
+ result += 1;
+ break;
+ }
+
+ default:
+ {
+ if (c >= 0x00 and c <= 0x1f)
+ {
+ // from c (1 byte) to \uxxxx (6 bytes)
+ result += 5;
+ }
+ break;
+ }
+ }
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief escape a string
+
+ Escape a string by replacing certain special characters by a sequence of an
+ escape character (backslash) and another character and other control
+ characters by a sequence of "\u" followed by a four-digit hex
+ representation.
+
+ @param[in] s the string to escape
+ @return the escaped string
+
+ @complexity Linear in the length of string @a s.
+ */
+ static string_t escape_string(const string_t& s) noexcept
+ {
+ const auto space = extra_space(s);
+ if (space == 0)
+ {
+ return s;
+ }
+
+ // create a result string of necessary size
+ string_t result(s.size() + space, '\\');
+ std::size_t pos = 0;
+
+ for (const auto& c : s)
+ {
+ switch (c)
+ {
+ // quotation mark (0x22)
+ case '"':
+ {
+ result[pos + 1] = '"';
+ pos += 2;
+ break;
+ }
+
+ // reverse solidus (0x5c)
+ case '\\':
+ {
+ // nothing to change
+ pos += 2;
+ break;
+ }
+
+ // backspace (0x08)
+ case '\b':
+ {
+ result[pos + 1] = 'b';
+ pos += 2;
+ break;
+ }
+
+ // formfeed (0x0c)
+ case '\f':
+ {
+ result[pos + 1] = 'f';
+ pos += 2;
+ break;
+ }
+
+ // newline (0x0a)
+ case '\n':
+ {
+ result[pos + 1] = 'n';
+ pos += 2;
+ break;
+ }
+
+ // carriage return (0x0d)
+ case '\r':
+ {
+ result[pos + 1] = 'r';
+ pos += 2;
+ break;
+ }
+
+ // horizontal tab (0x09)
+ case '\t':
+ {
+ result[pos + 1] = 't';
+ pos += 2;
+ break;
+ }
+
+ default:
+ {
+ if (c >= 0x00 and c <= 0x1f)
+ {
+ // print character c as \uxxxx
+ sprintf(&result[pos + 1], "u%04x", int(c));
+ pos += 6;
+ // overwrite trailing null character
+ result[pos] = '\\';
+ }
+ else
+ {
+ // all other characters are added as-is
+ result[pos++] = c;
+ }
+ break;
+ }
+ }
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief internal implementation of the serialization function
+
+ This function is called by the public member function dump and organizes
+ the serializaion internally. The indentation level is propagated as
+ additional parameter. In case of arrays and objects, the function is called
+ recursively. Note that
+
+ - strings and object keys are escaped using escape_string()
+ - integer numbers are converted implictly via operator<<
+ - floating-point numbers are converted to a string using "%g" format
+
+ @param[out] o stream to write to
+ @param[in] pretty_print whether the output shall be pretty-printed
+ @param[in] indent_step the indent level
+ @param[in] current_indent the current indent level (only used internally)
+ */
+ void dump(std::ostream& o, const bool pretty_print, const unsigned int indent_step,
+ const unsigned int current_indent = 0) const
+ {
+ // variable to hold indentation for recursive calls
+ unsigned int new_indent = current_indent;
+
+ switch (m_type)
+ {
+ case (value_t::object):
+ {
+ if (m_value.object->empty())
+ {
+ o << "{}";
+ return;
+ }
+
+ o << "{";
+
+ // increase indentation
+ if (pretty_print)
+ {
+ new_indent += indent_step;
+ o << "\n";
+ }
+
+ for (auto i = m_value.object->cbegin(); i != m_value.object->cend(); ++i)
+ {
+ if (i != m_value.object->cbegin())
+ {
+ o << (pretty_print ? ",\n" : ",");
+ }
+ o << string_t(new_indent, ' ') << "\""
+ << escape_string(i->first) << "\":"
+ << (pretty_print ? " " : "");
+ i->second.dump(o, pretty_print, indent_step, new_indent);
+ }
+
+ // decrease indentation
+ if (pretty_print)
+ {
+ new_indent -= indent_step;
+ o << "\n";
+ }
+
+ o << string_t(new_indent, ' ') + "}";
+ return;
+ }
+
+ case (value_t::array):
+ {
+ if (m_value.array->empty())
+ {
+ o << "[]";
+ return;
+ }
+
+ o << "[";
+
+ // increase indentation
+ if (pretty_print)
+ {
+ new_indent += indent_step;
+ o << "\n";
+ }
+
+ for (auto i = m_value.array->cbegin(); i != m_value.array->cend(); ++i)
+ {
+ if (i != m_value.array->cbegin())
+ {
+ o << (pretty_print ? ",\n" : ",");
+ }
+ o << string_t(new_indent, ' ');
+ i->dump(o, pretty_print, indent_step, new_indent);
+ }
+
+ // decrease indentation
+ if (pretty_print)
+ {
+ new_indent -= indent_step;
+ o << "\n";
+ }
+
+ o << string_t(new_indent, ' ') << "]";
+ return;
+ }
+
+ case (value_t::string):
+ {
+ o << string_t("\"") << escape_string(*m_value.string) << "\"";
+ return;
+ }
+
+ case (value_t::boolean):
+ {
+ o << (m_value.boolean ? "true" : "false");
+ return;
+ }
+
+ case (value_t::number_integer):
+ {
+ o << m_value.number_integer;
+ return;
+ }
+
+ case (value_t::number_float):
+ {
+ // 15 digits of precision allows round-trip IEEE 754
+ // string->double->string; to be safe, we read this value from
+ // std::numeric_limits<number_float_t>::digits10
+ o << std::setprecision(std::numeric_limits<number_float_t>::digits10) << m_value.number_float;
+ return;
+ }
+
+ case (value_t::discarded):
+ {
+ o << "<discarded>";
+ return;
+ }
+
+ default:
+ {
+ o << "null";
+ return;
+ }
+ }
+ }
+
+ private:
+ //////////////////////
+ // member variables //
+ //////////////////////
+
+ /// the type of the current element
+ value_t m_type = value_t::null;
+
+ /// the value of the current element
+ json_value m_value = {};
+
+
+ private:
+ ///////////////
+ // iterators //
+ ///////////////
+
+ /*!
+ @brief an iterator for primitive JSON types
+
+ This class models an iterator for primitive JSON types (boolean, number,
+ string). It's only purpose is to allow the iterator/const_iterator classes
+ to "iterate" over primitive values. Internally, the iterator is modeled by
+ a `difference_type` variable. Value begin_value (`0`) models the begin,
+ end_value (`1`) models past the end.
+ */
+ class primitive_iterator_t
+ {
+ public:
+ /// set iterator to a defined beginning
+ void set_begin()
+ {
+ m_it = begin_value;
+ }
+
+ /// set iterator to a defined past the end
+ void set_end()
+ {
+ m_it = end_value;
+ }
+
+ /// return whether the iterator can be dereferenced
+ bool is_begin() const
+ {
+ return (m_it == begin_value);
+ }
+
+ /// return whether the iterator is at end
+ bool is_end() const
+ {
+ return (m_it == end_value);
+ }
+
+ /// return reference to the value to change and compare
+ operator difference_type& ()
+ {
+ return m_it;
+ }
+
+ /// return value to compare
+ operator difference_type () const
+ {
+ return m_it;
+ }
+
+ private:
+ static constexpr difference_type begin_value = 0;
+ static constexpr difference_type end_value = begin_value + 1;
+
+ /// iterator as signed integer type
+ difference_type m_it = std::numeric_limits<std::ptrdiff_t>::min();
+ };
+
+ /*!
+ @brief an iterator value
+
+ @note This structure could easily be a union, but MSVC currently does not
+ allow unions members with complex constructors, see
+ https://github.com/nlohmann/json/pull/105.
+ */
+ struct internal_iterator
+ {
+ /// iterator for JSON objects
+ typename object_t::iterator object_iterator;
+ /// iterator for JSON arrays
+ typename array_t::iterator array_iterator;
+ /// generic iterator for all other types
+ primitive_iterator_t primitive_iterator;
+
+ /// create an uninitialized internal_iterator
+ internal_iterator()
+ : object_iterator(), array_iterator(), primitive_iterator()
+ {}
+ };
+
+ public:
+ /*!
+ @brief a const random access iterator for the @ref basic_json class
+
+ This class implements a const iterator for the @ref basic_json class. From
+ this class, the @ref iterator class is derived.
+
+ @requirement The class satisfies the following concept requirements:
+ - [RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator):
+ The iterator that can be moved to point (forward and backward) to any
+ element in constant time.
+ */
+ class const_iterator : public std::iterator<std::random_access_iterator_tag, const basic_json>
+ {
+ /// allow basic_json to access private members
+ friend class basic_json;
+
+ public:
+ /// the type of the values when the iterator is dereferenced
+ using value_type = typename basic_json::value_type;
+ /// a type to represent differences between iterators
+ using difference_type = typename basic_json::difference_type;
+ /// defines a pointer to the type iterated over (value_type)
+ using pointer = typename basic_json::const_pointer;
+ /// defines a reference to the type iterated over (value_type)
+ using reference = typename basic_json::const_reference;
+ /// the category of the iterator
+ using iterator_category = std::bidirectional_iterator_tag;
+
+ /// default constructor
+ const_iterator() = default;
+
+ /// constructor for a given JSON instance
+ const_iterator(pointer object) : m_object(object)
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ m_it.object_iterator = typename object_t::iterator();
+ break;
+ }
+ case (basic_json::value_t::array):
+ {
+ m_it.array_iterator = typename array_t::iterator();
+ break;
+ }
+ default:
+ {
+ m_it.primitive_iterator = primitive_iterator_t();
+ break;
+ }
+ }
+ }
+
+ /// copy constructor given a nonconst iterator
+ const_iterator(const iterator& other) : m_object(other.m_object)
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ m_it.object_iterator = other.m_it.object_iterator;
+ break;
+ }
+
+ case (basic_json::value_t::array):
+ {
+ m_it.array_iterator = other.m_it.array_iterator;
+ break;
+ }
+
+ default:
+ {
+ m_it.primitive_iterator = other.m_it.primitive_iterator;
+ break;
+ }
+ }
+ }
+
+ /// copy constructor
+ const_iterator(const const_iterator& other) noexcept
+ : m_object(other.m_object), m_it(other.m_it)
+ {}
+
+ /// copy assignment
+ const_iterator& operator=(const_iterator other) noexcept(
+ std::is_nothrow_move_constructible<pointer>::value and
+ std::is_nothrow_move_assignable<pointer>::value and
+ std::is_nothrow_move_constructible<internal_iterator>::value and
+ std::is_nothrow_move_assignable<internal_iterator>::value
+ )
+ {
+ std::swap(m_object, other.m_object);
+ std::swap(m_it, other.m_it);
+ return *this;
+ }
+
+ private:
+ /// set the iterator to the first value
+ void set_begin()
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ m_it.object_iterator = m_object->m_value.object->begin();
+ break;
+ }
+
+ case (basic_json::value_t::array):
+ {
+ m_it.array_iterator = m_object->m_value.array->begin();
+ break;
+ }
+
+ case (basic_json::value_t::null):
+ {
+ // set to end so begin()==end() is true: null is empty
+ m_it.primitive_iterator.set_end();
+ break;
+ }
+
+ default:
+ {
+ m_it.primitive_iterator.set_begin();
+ break;
+ }
+ }
+ }
+
+ /// set the iterator past the last value
+ void set_end()
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ m_it.object_iterator = m_object->m_value.object->end();
+ break;
+ }
+
+ case (basic_json::value_t::array):
+ {
+ m_it.array_iterator = m_object->m_value.array->end();
+ break;
+ }
+
+ default:
+ {
+ m_it.primitive_iterator.set_end();
+ break;
+ }
+ }
+ }
+
+ public:
+ /// return a reference to the value pointed to by the iterator
+ reference operator*() const
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ return m_it.object_iterator->second;
+ }
+
+ case (basic_json::value_t::array):
+ {
+ return *m_it.array_iterator;
+ }
+
+ case (basic_json::value_t::null):
+ {
+ throw std::out_of_range("cannot get value");
+ }
+
+ default:
+ {
+ if (m_it.primitive_iterator.is_begin())
+ {
+ return *m_object;
+ }
+ else
+ {
+ throw std::out_of_range("cannot get value");
+ }
+ }
+ }
+ }
+
+ /// dereference the iterator
+ pointer operator->() const
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ return &(m_it.object_iterator->second);
+ }
+
+ case (basic_json::value_t::array):
+ {
+ return &*m_it.array_iterator;
+ }
+
+ default:
+ {
+ if (m_it.primitive_iterator.is_begin())
+ {
+ return m_object;
+ }
+ else
+ {
+ throw std::out_of_range("cannot get value");
+ }
+ }
+ }
+ }
+
+ /// post-increment (it++)
+ const_iterator operator++(int)
+ {
+ auto result = *this;
+ ++(*this);
+
+ return result;
+ }
+
+ /// pre-increment (++it)
+ const_iterator& operator++()
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ ++m_it.object_iterator;
+ break;
+ }
+
+ case (basic_json::value_t::array):
+ {
+ ++m_it.array_iterator;
+ break;
+ }
+
+ default:
+ {
+ ++m_it.primitive_iterator;
+ break;
+ }
+ }
+
+ return *this;
+ }
+
+ /// post-decrement (it--)
+ const_iterator operator--(int)
+ {
+ auto result = *this;
+ --(*this);
+
+ return result;
+ }
+
+ /// pre-decrement (--it)
+ const_iterator& operator--()
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ --m_it.object_iterator;
+ break;
+ }
+
+ case (basic_json::value_t::array):
+ {
+ --m_it.array_iterator;
+ break;
+ }
+
+ default:
+ {
+ --m_it.primitive_iterator;
+ break;
+ }
+ }
+
+ return *this;
+ }
+
+ /// comparison: equal
+ bool operator==(const const_iterator& other) const
+ {
+ // if objects are not the same, the comparison is undefined
+ if (m_object != other.m_object)
+ {
+ throw std::domain_error("cannot compare iterators of different containers");
+ }
+
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ return (m_it.object_iterator == other.m_it.object_iterator);
+ }
+
+ case (basic_json::value_t::array):
+ {
+ return (m_it.array_iterator == other.m_it.array_iterator);
+ }
+
+ default:
+ {
+ return (m_it.primitive_iterator == other.m_it.primitive_iterator);
+ }
+ }
+ }
+
+ /// comparison: not equal
+ bool operator!=(const const_iterator& other) const
+ {
+ return not operator==(other);
+ }
+
+ /// comparison: smaller
+ bool operator<(const const_iterator& other) const
+ {
+ // if objects are not the same, the comparison is undefined
+ if (m_object != other.m_object)
+ {
+ throw std::domain_error("cannot compare iterators of different containers");
+ }
+
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ throw std::domain_error("cannot use operator< for object iterators");
+ }
+
+ case (basic_json::value_t::array):
+ {
+ return (m_it.array_iterator < other.m_it.array_iterator);
+ }
+
+ default:
+ {
+ return (m_it.primitive_iterator < other.m_it.primitive_iterator);
+ }
+ }
+ }
+
+ /// comparison: less than or equal
+ bool operator<=(const const_iterator& other) const
+ {
+ return not other.operator < (*this);
+ }
+
+ /// comparison: greater than
+ bool operator>(const const_iterator& other) const
+ {
+ return not operator<=(other);
+ }
+
+ /// comparison: greater than or equal
+ bool operator>=(const const_iterator& other) const
+ {
+ return not operator<(other);
+ }
+
+ /// add to iterator
+ const_iterator& operator+=(difference_type i)
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ throw std::domain_error("cannot use operator+= for object iterators");
+ }
+
+ case (basic_json::value_t::array):
+ {
+ m_it.array_iterator += i;
+ break;
+ }
+
+ default:
+ {
+ m_it.primitive_iterator += i;
+ break;
+ }
+ }
+
+ return *this;
+ }
+
+ /// subtract from iterator
+ const_iterator& operator-=(difference_type i)
+ {
+ return operator+=(-i);
+ }
+
+ /// add to iterator
+ const_iterator operator+(difference_type i)
+ {
+ auto result = *this;
+ result += i;
+ return result;
+ }
+
+ /// subtract from iterator
+ const_iterator operator-(difference_type i)
+ {
+ auto result = *this;
+ result -= i;
+ return result;
+ }
+
+ /// return difference
+ difference_type operator-(const const_iterator& other) const
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ throw std::domain_error("cannot use operator- for object iterators");
+ }
+
+ case (basic_json::value_t::array):
+ {
+ return m_it.array_iterator - other.m_it.array_iterator;
+ }
+
+ default:
+ {
+ return m_it.primitive_iterator - other.m_it.primitive_iterator;
+ }
+ }
+ }
+
+ /// access to successor
+ reference operator[](difference_type n) const
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ throw std::domain_error("cannot use operator[] for object iterators");
+ }
+
+ case (basic_json::value_t::array):
+ {
+ return *(m_it.array_iterator + n);
+ }
+
+ case (basic_json::value_t::null):
+ {
+ throw std::out_of_range("cannot get value");
+ }
+
+ default:
+ {
+ if (m_it.primitive_iterator == -n)
+ {
+ return *m_object;
+ }
+ else
+ {
+ throw std::out_of_range("cannot get value");
+ }
+ }
+ }
+ }
+
+ /// return the key of an object iterator
+ typename object_t::key_type key() const
+ {
+ switch (m_object->m_type)
+ {
+ case (basic_json::value_t::object):
+ {
+ return m_it.object_iterator->first;
+ }
+
+ default:
+ {
+ throw std::domain_error("cannot use key() for non-object iterators");
+ }
+ }
+ }
+
+ /// return the value of an iterator
+ reference value() const
+ {
+ return operator*();
+ }
+
+ private:
+ /// associated JSON instance
+ pointer m_object = nullptr;
+ /// the actual iterator of the associated instance
+ internal_iterator m_it = internal_iterator();
+ };
+
+ /*!
+ @brief a mutable random access iterator for the @ref basic_json class
+
+ @requirement The class satisfies the following concept requirements:
+ - [RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator):
+ The iterator that can be moved to point (forward and backward) to any
+ element in constant time.
+ - [OutputIterator](http://en.cppreference.com/w/cpp/concept/OutputIterator):
+ It is possible to write to the pointed-to element.
+ */
+ class iterator : public const_iterator
+ {
+ public:
+ using base_iterator = const_iterator;
+ using pointer = typename basic_json::pointer;
+ using reference = typename basic_json::reference;
+
+ /// default constructor
+ iterator() = default;
+
+ /// constructor for a given JSON instance
+ iterator(pointer object) noexcept : base_iterator(object)
+ {}
+
+ /// copy constructor
+ iterator(const iterator& other) noexcept
+ : base_iterator(other)
+ {}
+
+ /// copy assignment
+ iterator& operator=(iterator other) noexcept(
+ std::is_nothrow_move_constructible<pointer>::value and
+ std::is_nothrow_move_assignable<pointer>::value and
+ std::is_nothrow_move_constructible<internal_iterator>::value and
+ std::is_nothrow_move_assignable<internal_iterator>::value
+ )
+ {
+ base_iterator::operator=(other);
+ return *this;
+ }
+
+ /// return a reference to the value pointed to by the iterator
+ reference operator*()
+ {
+ return const_cast<reference>(base_iterator::operator*());
+ }
+
+ /// dereference the iterator
+ pointer operator->()
+ {
+ return const_cast<pointer>(base_iterator::operator->());
+ }
+
+ /// post-increment (it++)
+ iterator operator++(int)
+ {
+ iterator result = *this;
+ base_iterator::operator++();
+ return result;
+ }
+
+ /// pre-increment (++it)
+ iterator& operator++()
+ {
+ base_iterator::operator++();
+ return *this;
+ }
+
+ /// post-decrement (it--)
+ iterator operator--(int)
+ {
+ iterator result = *this;
+ base_iterator::operator--();
+ return result;
+ }
+
+ /// pre-decrement (--it)
+ iterator& operator--()
+ {
+ base_iterator::operator--();
+ return *this;
+ }
+
+ /// add to iterator
+ iterator& operator+=(difference_type i)
+ {
+ base_iterator::operator+=(i);
+ return *this;
+ }
+
+ /// subtract from iterator
+ iterator& operator-=(difference_type i)
+ {
+ base_iterator::operator-=(i);
+ return *this;
+ }
+
+ /// add to iterator
+ iterator operator+(difference_type i)
+ {
+ auto result = *this;
+ result += i;
+ return result;
+ }
+
+ /// subtract from iterator
+ iterator operator-(difference_type i)
+ {
+ auto result = *this;
+ result -= i;
+ return result;
+ }
+
+ difference_type operator-(const iterator& other) const
+ {
+ return base_iterator::operator-(other);
+ }
+
+ /// access to successor
+ reference operator[](difference_type n) const
+ {
+ return const_cast<reference>(base_iterator::operator[](n));
+ }
+
+ /// return the value of an iterator
+ reference value() const
+ {
+ return const_cast<reference>(base_iterator::value());
+ }
+ };
+
+ /*!
+ @brief a template for a reverse iterator class
+
+ @tparam Base the base iterator type to reverse. Valid types are @ref
+ iterator (to create @ref reverse_iterator) and @ref const_iterator (to
+ create @ref const_reverse_iterator).
+
+ @requirement The class satisfies the following concept requirements:
+ - [RandomAccessIterator](http://en.cppreference.com/w/cpp/concept/RandomAccessIterator):
+ The iterator that can be moved to point (forward and backward) to any
+ element in constant time.
+ - [OutputIterator](http://en.cppreference.com/w/cpp/concept/OutputIterator):
+ It is possible to write to the pointed-to element (only if @a Base is
+ @ref iterator).
+ */
+ template<typename Base>
+ class json_reverse_iterator : public std::reverse_iterator<Base>
+ {
+ public:
+ /// shortcut to the reverse iterator adaptor
+ using base_iterator = std::reverse_iterator<Base>;
+ /// the reference type for the pointed-to element
+ using reference = typename Base::reference;
+
+ /// create reverse iterator from iterator
+ json_reverse_iterator(const typename base_iterator::iterator_type& it)
+ : base_iterator(it) {}
+
+ /// create reverse iterator from base class
+ json_reverse_iterator(const base_iterator& it) : base_iterator(it) {}
+
+ /// post-increment (it++)
+ json_reverse_iterator operator++(int)
+ {
+ return base_iterator::operator++(1);
+ }
+
+ /// pre-increment (++it)
+ json_reverse_iterator& operator++()
+ {
+ base_iterator::operator++();
+ return *this;
+ }
+
+ /// post-decrement (it--)
+ json_reverse_iterator operator--(int)
+ {
+ return base_iterator::operator--(1);
+ }
+
+ /// pre-decrement (--it)
+ json_reverse_iterator& operator--()
+ {
+ base_iterator::operator--();
+ return *this;
+ }
+
+ /// add to iterator
+ json_reverse_iterator& operator+=(difference_type i)
+ {
+ base_iterator::operator+=(i);
+ return *this;
+ }
+
+ /// add to iterator
+ json_reverse_iterator operator+(difference_type i) const
+ {
+ auto result = *this;
+ result += i;
+ return result;
+ }
+
+ /// subtract from iterator
+ json_reverse_iterator operator-(difference_type i) const
+ {
+ auto result = *this;
+ result -= i;
+ return result;
+ }
+
+ /// return difference
+ difference_type operator-(const json_reverse_iterator& other) const
+ {
+ return this->base() - other.base();
+ }
+
+ /// access to successor
+ reference operator[](difference_type n) const
+ {
+ return *(this->operator+(n));
+ }
+
+ /// return the key of an object iterator
+ typename object_t::key_type key() const
+ {
+ auto it = --this->base();
+ return it.key();
+ }
+
+ /// return the value of an iterator
+ reference value() const
+ {
+ auto it = --this->base();
+ return it.operator * ();
+ }
+ };
+
+ /*!
+ @brief wrapper to access iterator member functions in range-based for
+
+ This class allows to access @ref key() and @ref value() during range-based
+ for loops. In these loops, a reference to the JSON values is returned, so
+ there is no access to the underlying iterator.
+ */
+ class iterator_wrapper
+ {
+ private:
+ /// the container to iterate
+ basic_json& container;
+ /// the type of the iterator to use while iteration
+ using json_iterator = decltype(std::begin(container));
+
+ /// internal iterator wrapper
+ class iterator_wrapper_internal
+ {
+ private:
+ /// the iterator
+ json_iterator anchor;
+ /// an index for arrays
+ size_t array_index = 0;
+
+ public:
+ /// construct wrapper given an iterator
+ iterator_wrapper_internal(json_iterator i) : anchor(i)
+ {}
+
+ /// dereference operator (needed for range-based for)
+ iterator_wrapper_internal& operator*()
+ {
+ return *this;
+ }
+
+ /// increment operator (needed for range-based for)
+ iterator_wrapper_internal& operator++()
+ {
+ ++anchor;
+ ++array_index;
+
+ return *this;
+ }
+
+ /// inequality operator (needed for range-based for)
+ bool operator!= (const iterator_wrapper_internal& o)
+ {
+ return anchor != o.anchor;
+ }
+
+ /// stream operator
+ friend std::ostream& operator<<(std::ostream& o, const iterator_wrapper_internal& w)
+ {
+ return o << w.value();
+ }
+
+ /// return key of the iterator
+ typename basic_json::string_t key() const
+ {
+ switch (anchor.m_object->type())
+ {
+ /// use integer array index as key
+ case (value_t::array):
+ {
+ return std::to_string(array_index);
+ }
+
+ /// use key from the object
+ case (value_t::object):
+ {
+ return anchor.key();
+ }
+
+ /// use an empty key for all primitive types
+ default:
+ {
+ return "";
+ }
+ }
+ }
+
+ /// return value of the iterator
+ typename json_iterator::reference value() const
+ {
+ return anchor.value();
+ }
+ };
+
+ public:
+ /// construct iterator wrapper from a container
+ iterator_wrapper(basic_json& cont)
+ : container(cont)
+ {}
+
+ /// return iterator begin (needed for range-based for)
+ iterator_wrapper_internal begin()
+ {
+ return iterator_wrapper_internal(container.begin());
+ }
+
+ /// return iterator end (needed for range-based for)
+ iterator_wrapper_internal end()
+ {
+ return iterator_wrapper_internal(container.end());
+ }
+ };
+
+ private:
+ //////////////////////
+ // lexer and parser //
+ //////////////////////
+
+ /*!
+ @brief lexical analysis
+
+ This class organizes the lexical analysis during JSON deserialization. The
+ core of it is a scanner generated by re2c <http://re2c.org> that processes
+ a buffer and recognizes tokens according to RFC 7159.
+ */
+ class lexer
+ {
+ public:
+ /// token types for the parser
+ enum class token_type
+ {
+ uninitialized, ///< indicating the scanner is uninitialized
+ literal_true, ///< the "true" literal
+ literal_false, ///< the "false" literal
+ literal_null, ///< the "null" literal
+ value_string, ///< a string - use get_string() for actual value
+ value_number, ///< a number - use get_number() for actual value
+ begin_array, ///< the character for array begin "["
+ begin_object, ///< the character for object begin "{"
+ end_array, ///< the character for array end "]"
+ end_object, ///< the character for object end "}"
+ name_separator, ///< the name separator ":"
+ value_separator, ///< the value separator ","
+ parse_error, ///< indicating a parse error
+ end_of_input ///< indicating the end of the input buffer
+ };
+
+ /// the char type to use in the lexer
+ using lexer_char_t = unsigned char;
+
+ /// constructor with a given buffer
+ explicit lexer(const string_t& s) noexcept
+ : m_stream(nullptr), m_buffer(s)
+ {
+ m_content = reinterpret_cast<const lexer_char_t*>(s.c_str());
+ m_start = m_cursor = m_content;
+ m_limit = m_content + s.size();
+ }
+ explicit lexer(std::istream* s) noexcept
+ : m_stream(s), m_buffer()
+ {
+ getline(*m_stream, m_buffer);
+ m_content = reinterpret_cast<const lexer_char_t*>(m_buffer.c_str());
+ m_start = m_cursor = m_content;
+ m_limit = m_content + m_buffer.size();
+ }
+
+ /// default constructor
+ lexer() = default;
+
+ // switch of unwanted functions
+ lexer(const lexer&) = delete;
+ lexer operator=(const lexer&) = delete;
+
+ /*!
+ @brief create a string from a Unicode code point
+
+ @param[in] codepoint1 the code point (can be high surrogate)
+ @param[in] codepoint2 the code point (can be low surrogate or 0)
+ @return string representation of the code point
+ @throw std::out_of_range if code point is >0x10ffff
+ @throw std::invalid_argument if the low surrogate is invalid
+
+ @see <http://en.wikipedia.org/wiki/UTF-8#Sample_code>
+ */
+ static string_t to_unicode(const std::size_t codepoint1,
+ const std::size_t codepoint2 = 0)
+ {
+ string_t result;
+
+ // calculate the codepoint from the given code points
+ std::size_t codepoint = codepoint1;
+
+ // check if codepoint1 is a high surrogate
+ if (codepoint1 >= 0xD800 and codepoint1 <= 0xDBFF)
+ {
+ // check if codepoint2 is a low surrogate
+ if (codepoint2 >= 0xDC00 and codepoint2 <= 0xDFFF)
+ {
+ codepoint =
+ // high surrogate occupies the most significant 22 bits
+ (codepoint1 << 10)
+ // low surrogate occupies the least significant 15 bits
+ + codepoint2
+ // there is still the 0xD800, 0xDC00 and 0x10000 noise
+ // in the result so we have to substract with:
+ // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
+ - 0x35FDC00;
+ }
+ else
+ {
+ throw std::invalid_argument("missing or wrong low surrogate");
+ }
+ }
+
+ if (codepoint < 0x80)
+ {
+ // 1-byte characters: 0xxxxxxx (ASCII)
+ result.append(1, static_cast<typename string_t::value_type>(codepoint));
+ }
+ else if (codepoint <= 0x7ff)
+ {
+ // 2-byte characters: 110xxxxx 10xxxxxx
+ result.append(1, static_cast<typename string_t::value_type>(0xC0 | ((codepoint >> 6) & 0x1F)));
+ result.append(1, static_cast<typename string_t::value_type>(0x80 | (codepoint & 0x3F)));
+ }
+ else if (codepoint <= 0xffff)
+ {
+ // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
+ result.append(1, static_cast<typename string_t::value_type>(0xE0 | ((codepoint >> 12) & 0x0F)));
+ result.append(1, static_cast<typename string_t::value_type>(0x80 | ((codepoint >> 6) & 0x3F)));
+ result.append(1, static_cast<typename string_t::value_type>(0x80 | (codepoint & 0x3F)));
+ }
+ else if (codepoint <= 0x10ffff)
+ {
+ // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ result.append(1, static_cast<typename string_t::value_type>(0xF0 | ((codepoint >> 18) & 0x07)));
+ result.append(1, static_cast<typename string_t::value_type>(0x80 | ((codepoint >> 12) & 0x3F)));
+ result.append(1, static_cast<typename string_t::value_type>(0x80 | ((codepoint >> 6) & 0x3F)));
+ result.append(1, static_cast<typename string_t::value_type>(0x80 | (codepoint & 0x3F)));
+ }
+ else
+ {
+ throw std::out_of_range("code points above 0x10FFFF are invalid");
+ }
+
+ return result;
+ }
+
+ /// return name of values of type token_type
+ static std::string token_type_name(token_type t)
+ {
+ switch (t)
+ {
+ case (token_type::uninitialized):
+ return "<uninitialized>";
+ case (token_type::literal_true):
+ return "true literal";
+ case (token_type::literal_false):
+ return "false literal";
+ case (token_type::literal_null):
+ return "null literal";
+ case (token_type::value_string):
+ return "string literal";
+ case (token_type::value_number):
+ return "number literal";
+ case (token_type::begin_array):
+ return "[";
+ case (token_type::begin_object):
+ return "{";
+ case (token_type::end_array):
+ return "]";
+ case (token_type::end_object):
+ return "}";
+ case (token_type::name_separator):
+ return ":";
+ case (token_type::value_separator):
+ return ",";
+ case (token_type::end_of_input):
+ return "<end of input>";
+ default:
+ return "<parse error>";
+ }
+ }
+
+ /*!
+ This function implements a scanner for JSON. It is specified using
+ regular expressions that try to follow RFC 7159 as close as possible.
+ These regular expressions are then translated into a deterministic
+ finite automaton (DFA) by the tool re2c <http://re2c.org>. As a result,
+ the translated code for this function consists of a large block of code
+ with goto jumps.
+
+ @return the class of the next token read from the buffer
+ */
+ token_type scan() noexcept
+ {
+ // pointer for backtracking information
+ m_marker = nullptr;
+
+ // remember the begin of the token
+ m_start = m_cursor;
+
+
+ {
+ lexer_char_t yych;
+ unsigned int yyaccept = 0;
+ static const unsigned char yybm[] =
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 32, 32, 0, 0, 32, 0, 0,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 96, 64, 0, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 0, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ };
+
+ if ((m_limit - m_cursor) < 5)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '9')
+ {
+ if (yych <= ' ')
+ {
+ if (yych <= '\n')
+ {
+ if (yych <= 0x00)
+ {
+ goto basic_json_parser_27;
+ }
+ if (yych <= 0x08)
+ {
+ goto basic_json_parser_29;
+ }
+ if (yych >= '\n')
+ {
+ goto basic_json_parser_4;
+ }
+ }
+ else
+ {
+ if (yych == '\r')
+ {
+ goto basic_json_parser_2;
+ }
+ if (yych <= 0x1F)
+ {
+ goto basic_json_parser_29;
+ }
+ }
+ }
+ else
+ {
+ if (yych <= ',')
+ {
+ if (yych == '"')
+ {
+ goto basic_json_parser_26;
+ }
+ if (yych <= '+')
+ {
+ goto basic_json_parser_29;
+ }
+ goto basic_json_parser_14;
+ }
+ else
+ {
+ if (yych <= '-')
+ {
+ goto basic_json_parser_22;
+ }
+ if (yych <= '/')
+ {
+ goto basic_json_parser_29;
+ }
+ if (yych <= '0')
+ {
+ goto basic_json_parser_23;
+ }
+ goto basic_json_parser_25;
+ }
+ }
+ }
+ else
+ {
+ if (yych <= 'm')
+ {
+ if (yych <= '\\')
+ {
+ if (yych <= ':')
+ {
+ goto basic_json_parser_16;
+ }
+ if (yych == '[')
+ {
+ goto basic_json_parser_6;
+ }
+ goto basic_json_parser_29;
+ }
+ else
+ {
+ if (yych <= ']')
+ {
+ goto basic_json_parser_8;
+ }
+ if (yych == 'f')
+ {
+ goto basic_json_parser_21;
+ }
+ goto basic_json_parser_29;
+ }
+ }
+ else
+ {
+ if (yych <= 'z')
+ {
+ if (yych <= 'n')
+ {
+ goto basic_json_parser_18;
+ }
+ if (yych == 't')
+ {
+ goto basic_json_parser_20;
+ }
+ goto basic_json_parser_29;
+ }
+ else
+ {
+ if (yych <= '{')
+ {
+ goto basic_json_parser_10;
+ }
+ if (yych == '}')
+ {
+ goto basic_json_parser_12;
+ }
+ goto basic_json_parser_29;
+ }
+ }
+ }
+basic_json_parser_2:
+ ++m_cursor;
+ yych = *m_cursor;
+ goto basic_json_parser_5;
+basic_json_parser_3:
+ {
+ return scan();
+ }
+basic_json_parser_4:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+basic_json_parser_5:
+ if (yybm[0 + yych] & 32)
+ {
+ goto basic_json_parser_4;
+ }
+ goto basic_json_parser_3;
+basic_json_parser_6:
+ ++m_cursor;
+ {
+ return token_type::begin_array;
+ }
+basic_json_parser_8:
+ ++m_cursor;
+ {
+ return token_type::end_array;
+ }
+basic_json_parser_10:
+ ++m_cursor;
+ {
+ return token_type::begin_object;
+ }
+basic_json_parser_12:
+ ++m_cursor;
+ {
+ return token_type::end_object;
+ }
+basic_json_parser_14:
+ ++m_cursor;
+ {
+ return token_type::value_separator;
+ }
+basic_json_parser_16:
+ ++m_cursor;
+ {
+ return token_type::name_separator;
+ }
+basic_json_parser_18:
+ yyaccept = 0;
+ yych = *(m_marker = ++m_cursor);
+ if (yych == 'u')
+ {
+ goto basic_json_parser_59;
+ }
+basic_json_parser_19:
+ {
+ return token_type::parse_error;
+ }
+basic_json_parser_20:
+ yyaccept = 0;
+ yych = *(m_marker = ++m_cursor);
+ if (yych == 'r')
+ {
+ goto basic_json_parser_55;
+ }
+ goto basic_json_parser_19;
+basic_json_parser_21:
+ yyaccept = 0;
+ yych = *(m_marker = ++m_cursor);
+ if (yych == 'a')
+ {
+ goto basic_json_parser_50;
+ }
+ goto basic_json_parser_19;
+basic_json_parser_22:
+ yych = *++m_cursor;
+ if (yych <= '/')
+ {
+ goto basic_json_parser_19;
+ }
+ if (yych <= '0')
+ {
+ goto basic_json_parser_49;
+ }
+ if (yych <= '9')
+ {
+ goto basic_json_parser_40;
+ }
+ goto basic_json_parser_19;
+basic_json_parser_23:
+ yyaccept = 1;
+ yych = *(m_marker = ++m_cursor);
+ if (yych <= 'D')
+ {
+ if (yych == '.')
+ {
+ goto basic_json_parser_42;
+ }
+ }
+ else
+ {
+ if (yych <= 'E')
+ {
+ goto basic_json_parser_43;
+ }
+ if (yych == 'e')
+ {
+ goto basic_json_parser_43;
+ }
+ }
+basic_json_parser_24:
+ {
+ return token_type::value_number;
+ }
+basic_json_parser_25:
+ yyaccept = 1;
+ yych = *(m_marker = ++m_cursor);
+ goto basic_json_parser_41;
+basic_json_parser_26:
+ yyaccept = 0;
+ yych = *(m_marker = ++m_cursor);
+ if (yych <= 0x0F)
+ {
+ goto basic_json_parser_19;
+ }
+ goto basic_json_parser_31;
+basic_json_parser_27:
+ ++m_cursor;
+ {
+ return token_type::end_of_input;
+ }
+basic_json_parser_29:
+ yych = *++m_cursor;
+ goto basic_json_parser_19;
+basic_json_parser_30:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+basic_json_parser_31:
+ if (yybm[0 + yych] & 64)
+ {
+ goto basic_json_parser_30;
+ }
+ if (yych <= 0x0F)
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych <= '"')
+ {
+ goto basic_json_parser_34;
+ }
+ goto basic_json_parser_33;
+basic_json_parser_32:
+ m_cursor = m_marker;
+ if (yyaccept == 0)
+ {
+ goto basic_json_parser_19;
+ }
+ else
+ {
+ goto basic_json_parser_24;
+ }
+basic_json_parser_33:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= 'e')
+ {
+ if (yych <= '/')
+ {
+ if (yych == '"')
+ {
+ goto basic_json_parser_30;
+ }
+ if (yych <= '.')
+ {
+ goto basic_json_parser_32;
+ }
+ goto basic_json_parser_30;
+ }
+ else
+ {
+ if (yych <= '\\')
+ {
+ if (yych <= '[')
+ {
+ goto basic_json_parser_32;
+ }
+ goto basic_json_parser_30;
+ }
+ else
+ {
+ if (yych == 'b')
+ {
+ goto basic_json_parser_30;
+ }
+ goto basic_json_parser_32;
+ }
+ }
+ }
+ else
+ {
+ if (yych <= 'q')
+ {
+ if (yych <= 'f')
+ {
+ goto basic_json_parser_30;
+ }
+ if (yych == 'n')
+ {
+ goto basic_json_parser_30;
+ }
+ goto basic_json_parser_32;
+ }
+ else
+ {
+ if (yych <= 's')
+ {
+ if (yych <= 'r')
+ {
+ goto basic_json_parser_30;
+ }
+ goto basic_json_parser_32;
+ }
+ else
+ {
+ if (yych <= 't')
+ {
+ goto basic_json_parser_30;
+ }
+ if (yych <= 'u')
+ {
+ goto basic_json_parser_36;
+ }
+ goto basic_json_parser_32;
+ }
+ }
+ }
+basic_json_parser_34:
+ ++m_cursor;
+ {
+ return token_type::value_string;
+ }
+basic_json_parser_36:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '@')
+ {
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= ':')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+ else
+ {
+ if (yych <= 'F')
+ {
+ goto basic_json_parser_37;
+ }
+ if (yych <= '`')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= 'g')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+basic_json_parser_37:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '@')
+ {
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= ':')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+ else
+ {
+ if (yych <= 'F')
+ {
+ goto basic_json_parser_38;
+ }
+ if (yych <= '`')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= 'g')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+basic_json_parser_38:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '@')
+ {
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= ':')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+ else
+ {
+ if (yych <= 'F')
+ {
+ goto basic_json_parser_39;
+ }
+ if (yych <= '`')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= 'g')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+basic_json_parser_39:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '@')
+ {
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych <= '9')
+ {
+ goto basic_json_parser_30;
+ }
+ goto basic_json_parser_32;
+ }
+ else
+ {
+ if (yych <= 'F')
+ {
+ goto basic_json_parser_30;
+ }
+ if (yych <= '`')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych <= 'f')
+ {
+ goto basic_json_parser_30;
+ }
+ goto basic_json_parser_32;
+ }
+basic_json_parser_40:
+ yyaccept = 1;
+ m_marker = ++m_cursor;
+ if ((m_limit - m_cursor) < 3)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+basic_json_parser_41:
+ if (yybm[0 + yych] & 128)
+ {
+ goto basic_json_parser_40;
+ }
+ if (yych <= 'D')
+ {
+ if (yych != '.')
+ {
+ goto basic_json_parser_24;
+ }
+ }
+ else
+ {
+ if (yych <= 'E')
+ {
+ goto basic_json_parser_43;
+ }
+ if (yych == 'e')
+ {
+ goto basic_json_parser_43;
+ }
+ goto basic_json_parser_24;
+ }
+basic_json_parser_42:
+ yych = *++m_cursor;
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych <= '9')
+ {
+ goto basic_json_parser_47;
+ }
+ goto basic_json_parser_32;
+basic_json_parser_43:
+ yych = *++m_cursor;
+ if (yych <= ',')
+ {
+ if (yych != '+')
+ {
+ goto basic_json_parser_32;
+ }
+ }
+ else
+ {
+ if (yych <= '-')
+ {
+ goto basic_json_parser_44;
+ }
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych <= '9')
+ {
+ goto basic_json_parser_45;
+ }
+ goto basic_json_parser_32;
+ }
+basic_json_parser_44:
+ yych = *++m_cursor;
+ if (yych <= '/')
+ {
+ goto basic_json_parser_32;
+ }
+ if (yych >= ':')
+ {
+ goto basic_json_parser_32;
+ }
+basic_json_parser_45:
+ ++m_cursor;
+ if (m_limit <= m_cursor)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= '/')
+ {
+ goto basic_json_parser_24;
+ }
+ if (yych <= '9')
+ {
+ goto basic_json_parser_45;
+ }
+ goto basic_json_parser_24;
+basic_json_parser_47:
+ yyaccept = 1;
+ m_marker = ++m_cursor;
+ if ((m_limit - m_cursor) < 3)
+ {
+ yyfill(); // LCOV_EXCL_LINE;
+ }
+ yych = *m_cursor;
+ if (yych <= 'D')
+ {
+ if (yych <= '/')
+ {
+ goto basic_json_parser_24;
+ }
+ if (yych <= '9')
+ {
+ goto basic_json_parser_47;
+ }
+ goto basic_json_parser_24;
+ }
+ else
+ {
+ if (yych <= 'E')
+ {
+ goto basic_json_parser_43;
+ }
+ if (yych == 'e')
+ {
+ goto basic_json_parser_43;
+ }
+ goto basic_json_parser_24;
+ }
+basic_json_parser_49:
+ yyaccept = 1;
+ yych = *(m_marker = ++m_cursor);
+ if (yych <= 'D')
+ {
+ if (yych == '.')
+ {
+ goto basic_json_parser_42;
+ }
+ goto basic_json_parser_24;
+ }
+ else
+ {
+ if (yych <= 'E')
+ {
+ goto basic_json_parser_43;
+ }
+ if (yych == 'e')
+ {
+ goto basic_json_parser_43;
+ }
+ goto basic_json_parser_24;
+ }
+basic_json_parser_50:
+ yych = *++m_cursor;
+ if (yych != 'l')
+ {
+ goto basic_json_parser_32;
+ }
+ yych = *++m_cursor;
+ if (yych != 's')
+ {
+ goto basic_json_parser_32;
+ }
+ yych = *++m_cursor;
+ if (yych != 'e')
+ {
+ goto basic_json_parser_32;
+ }
+ ++m_cursor;
+ {
+ return token_type::literal_false;
+ }
+basic_json_parser_55:
+ yych = *++m_cursor;
+ if (yych != 'u')
+ {
+ goto basic_json_parser_32;
+ }
+ yych = *++m_cursor;
+ if (yych != 'e')
+ {
+ goto basic_json_parser_32;
+ }
+ ++m_cursor;
+ {
+ return token_type::literal_true;
+ }
+basic_json_parser_59:
+ yych = *++m_cursor;
+ if (yych != 'l')
+ {
+ goto basic_json_parser_32;
+ }
+ yych = *++m_cursor;
+ if (yych != 'l')
+ {
+ goto basic_json_parser_32;
+ }
+ ++m_cursor;
+ {
+ return token_type::literal_null;
+ }
+ }
+
+
+ }
+
+ /// append data from the stream to the internal buffer
+ void yyfill() noexcept
+ {
+ if (not m_stream or not * m_stream)
+ {
+ return;
+ }
+
+ const ssize_t offset_start = m_start - m_content;
+ const ssize_t offset_marker = m_marker - m_start;
+ const ssize_t offset_cursor = m_cursor - m_start;
+
+ m_buffer.erase(0, static_cast<size_t>(offset_start));
+ std::string line;
+ std::getline(*m_stream, line);
+ m_buffer += "\n" + line; // add line with newline symbol
+
+ m_content = reinterpret_cast<const lexer_char_t*>(m_buffer.c_str());
+ m_start = m_content;
+ m_marker = m_start + offset_marker;
+ m_cursor = m_start + offset_cursor;
+ m_limit = m_start + m_buffer.size() - 1;
+ }
+
+ /// return string representation of last read token
+ string_t get_token() const noexcept
+ {
+ return string_t(reinterpret_cast<typename string_t::const_pointer>(m_start),
+ static_cast<size_t>(m_cursor - m_start));
+ }
+
+ /*!
+ @brief return string value for string tokens
+
+ The function iterates the characters between the opening and closing
+ quotes of the string value. The complete string is the range
+ [m_start,m_cursor). Consequently, we iterate from m_start+1 to
+ m_cursor-1.
+
+ We differentiate two cases:
+
+ 1. Escaped characters. In this case, a new character is constructed
+ according to the nature of the escape. Some escapes create new
+ characters (e.g., @c "\\n" is replaced by @c "\n"), some are copied
+ as is (e.g., @c "\\\\"). Furthermore, Unicode escapes of the shape
+ @c "\\uxxxx" need special care. In this case, to_unicode takes care
+ of the construction of the values.
+ 2. Unescaped characters are copied as is.
+
+ @return string value of current token without opening and closing quotes
+ @throw std::out_of_range if to_unicode fails
+ */
+ string_t get_string() const
+ {
+ string_t result;
+ result.reserve(static_cast<size_t>(m_cursor - m_start - 2));
+
+ // iterate the result between the quotes
+ for (const lexer_char_t* i = m_start + 1; i < m_cursor - 1; ++i)
+ {
+ // process escaped characters
+ if (*i == '\\')
+ {
+ // read next character
+ ++i;
+
+ switch (*i)
+ {
+ // the default escapes
+ case 't':
+ {
+ result += "\t";
+ break;
+ }
+ case 'b':
+ {
+ result += "\b";
+ break;
+ }
+ case 'f':
+ {
+ result += "\f";
+ break;
+ }
+ case 'n':
+ {
+ result += "\n";
+ break;
+ }
+ case 'r':
+ {
+ result += "\r";
+ break;
+ }
+ case '\\':
+ {
+ result += "\\";
+ break;
+ }
+ case '/':
+ {
+ result += "/";
+ break;
+ }
+ case '"':
+ {
+ result += "\"";
+ break;
+ }
+
+ // unicode
+ case 'u':
+ {
+ // get code xxxx from uxxxx
+ auto codepoint = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>(i + 1),
+ 4).c_str(), nullptr, 16);
+
+ // check if codepoint is a high surrogate
+ if (codepoint >= 0xD800 and codepoint <= 0xDBFF)
+ {
+ // make sure there is a subsequent unicode
+ if ((i + 6 >= m_limit) or * (i + 5) != '\\' or * (i + 6) != 'u')
+ {
+ throw std::invalid_argument("missing low surrogate");
+ }
+
+ // get code yyyy from uxxxx\uyyyy
+ auto codepoint2 = std::strtoul(std::string(reinterpret_cast<typename string_t::const_pointer>
+ (i + 7), 4).c_str(), nullptr, 16);
+ result += to_unicode(codepoint, codepoint2);
+ // skip the next 11 characters (xxxx\uyyyy)
+ i += 11;
+ }
+ else
+ {
+ // add unicode character(s)
+ result += to_unicode(codepoint);
+ // skip the next four characters (xxxx)
+ i += 4;
+ }
+ break;
+ }
+ }
+ }
+ else
+ {
+ // all other characters are just copied to the end of the
+ // string
+ result.append(1, static_cast<typename string_t::value_type>(*i));
+ }
+ }
+
+ return result;
+ }
+
+ /*!
+ @brief return number value for number tokens
+
+ This function translates the last token into a floating point number.
+ The pointer m_start points to the beginning of the parsed number. We
+ pass this pointer to std::strtod which sets endptr to the first
+ character past the converted number. If this pointer is not the same as
+ m_cursor, then either more or less characters have been used during the
+ comparison. This can happen for inputs like "01" which will be treated
+ like number 0 followed by number 1.
+
+ @return the result of the number conversion or NAN if the conversion
+ read past the current token. The latter case needs to be treated by the
+ caller function.
+
+ @throw std::range_error if passed value is out of range
+ */
+ long double get_number() const
+ {
+ // conversion
+ typename string_t::value_type* endptr;
+ const auto float_val = std::strtold(reinterpret_cast<typename string_t::const_pointer>(m_start),
+ &endptr);
+
+ // return float_val if the whole number was translated and NAN
+ // otherwise
+ return (reinterpret_cast<lexer_char_t*>(endptr) == m_cursor) ? float_val : NAN;
+ }
+
+ private:
+ /// optional input stream
+ std::istream* m_stream;
+ /// the buffer
+ string_t m_buffer;
+ /// the buffer pointer
+ const lexer_char_t* m_content = nullptr;
+ /// pointer to the beginning of the current symbol
+ const lexer_char_t* m_start = nullptr;
+ /// pointer for backtracking information
+ const lexer_char_t* m_marker = nullptr;
+ /// pointer to the current symbol
+ const lexer_char_t* m_cursor = nullptr;
+ /// pointer to the end of the buffer
+ const lexer_char_t* m_limit = nullptr;
+ };
+
+ /*!
+ @brief syntax analysis
+ */
+ class parser
+ {
+ public:
+ /// constructor for strings
+ parser(const string_t& s, parser_callback_t cb = nullptr)
+ : callback(cb), m_lexer(s)
+ {
+ // read first token
+ get_token();
+ }
+
+ /// a parser reading from an input stream
+ parser(std::istream& _is, parser_callback_t cb = nullptr)
+ : callback(cb), m_lexer(&_is)
+ {
+ // read first token
+ get_token();
+ }
+
+ /// public parser interface
+ basic_json parse()
+ {
+ basic_json result = parse_internal(true);
+
+ expect(lexer::token_type::end_of_input);
+
+ // return parser result and replace it with null in case the
+ // top-level value was discarded by the callback function
+ return result.is_discarded() ? basic_json() : result;
+ }
+
+ private:
+ /// the actual parser
+ basic_json parse_internal(bool keep)
+ {
+ auto result = basic_json(value_t::discarded);
+
+ switch (last_token)
+ {
+ case (lexer::token_type::begin_object):
+ {
+ if (keep and (not callback or (keep = callback(depth++, parse_event_t::object_start, result))))
+ {
+ // explicitly set result to object to cope with {}
+ result.m_type = value_t::object;
+ result.m_value = json_value(value_t::object);
+ }
+
+ // read next token
+ get_token();
+
+ // closing } -> we are done
+ if (last_token == lexer::token_type::end_object)
+ {
+ get_token();
+ if (keep and callback and not callback(--depth, parse_event_t::object_end, result))
+ {
+ result = basic_json(value_t::discarded);
+ }
+ return result;
+ }
+
+ // no comma is expected here
+ unexpect(lexer::token_type::value_separator);
+
+ // otherwise: parse key-value pairs
+ do
+ {
+ // ugly, but could be fixed with loop reorganization
+ if (last_token == lexer::token_type::value_separator)
+ {
+ get_token();
+ }
+
+ // store key
+ expect(lexer::token_type::value_string);
+ const auto key = m_lexer.get_string();
+
+ bool keep_tag = false;
+ if (keep)
+ {
+ if (callback)
+ {
+ basic_json k(key);
+ keep_tag = callback(depth, parse_event_t::key, k);
+ }
+ else
+ {
+ keep_tag = true;
+ }
+ }
+
+ // parse separator (:)
+ get_token();
+ expect(lexer::token_type::name_separator);
+
+ // parse and add value
+ get_token();
+ auto value = parse_internal(keep);
+ if (keep and keep_tag and not value.is_discarded())
+ {
+ result[key] = std::move(value);
+ }
+ }
+ while (last_token == lexer::token_type::value_separator);
+
+ // closing }
+ expect(lexer::token_type::end_object);
+ get_token();
+ if (keep and callback and not callback(--depth, parse_event_t::object_end, result))
+ {
+ result = basic_json(value_t::discarded);
+ }
+
+ return result;
+ }
+
+ case (lexer::token_type::begin_array):
+ {
+ if (keep and (not callback or (keep = callback(depth++, parse_event_t::array_start, result))))
+ {
+ // explicitly set result to object to cope with []
+ result.m_type = value_t::array;
+ result.m_value = json_value(value_t::array);
+ }
+
+ // read next token
+ get_token();
+
+ // closing ] -> we are done
+ if (last_token == lexer::token_type::end_array)
+ {
+ get_token();
+ if (callback and not callback(--depth, parse_event_t::array_end, result))
+ {
+ result = basic_json(value_t::discarded);
+ }
+ return result;
+ }
+
+ // no comma is expected here
+ unexpect(lexer::token_type::value_separator);
+
+ // otherwise: parse values
+ do
+ {
+ // ugly, but could be fixed with loop reorganization
+ if (last_token == lexer::token_type::value_separator)
+ {
+ get_token();
+ }
+
+ // parse value
+ auto value = parse_internal(keep);
+ if (keep and not value.is_discarded())
+ {
+ result.push_back(std::move(value));
+ }
+ }
+ while (last_token == lexer::token_type::value_separator);
+
+ // closing ]
+ expect(lexer::token_type::end_array);
+ get_token();
+ if (keep and callback and not callback(--depth, parse_event_t::array_end, result))
+ {
+ result = basic_json(value_t::discarded);
+ }
+
+ return result;
+ }
+
+ case (lexer::token_type::literal_null):
+ {
+ get_token();
+ result.m_type = value_t::null;
+ break;
+ }
+
+ case (lexer::token_type::value_string):
+ {
+ const auto s = m_lexer.get_string();
+ get_token();
+ result = basic_json(s);
+ break;
+ }
+
+ case (lexer::token_type::literal_true):
+ {
+ get_token();
+ result.m_type = value_t::boolean;
+ result.m_value = true;
+ break;
+ }
+
+ case (lexer::token_type::literal_false):
+ {
+ get_token();
+ result.m_type = value_t::boolean;
+ result.m_value = false;
+ break;
+ }
+
+ case (lexer::token_type::value_number):
+ {
+ auto float_val = m_lexer.get_number();
+
+ // NAN is returned if token could not be translated
+ // completely
+ if (std::isnan(float_val))
+ {
+ throw std::invalid_argument(std::string("parse error - ") +
+ m_lexer.get_token() + " is not a number");
+ }
+
+ get_token();
+
+ // check if conversion loses precision
+ const auto int_val = static_cast<number_integer_t>(float_val);
+ if (approx(float_val, static_cast<long double>(int_val)))
+ {
+ // we basic_json not lose precision -> return int
+ result.m_type = value_t::number_integer;
+ result.m_value = int_val;
+ }
+ else
+ {
+ // we would lose precision -> returnfloat
+ result.m_type = value_t::number_float;
+ result.m_value = static_cast<number_float_t>(float_val);
+ }
+ break;
+ }
+
+ default:
+ {
+ // the last token was unexpected
+ unexpect(last_token);
+ }
+ }
+
+ if (keep and callback and not callback(depth, parse_event_t::value, result))
+ {
+ result = basic_json(value_t::discarded);
+ }
+ return result;
+ }
+
+ /// get next token from lexer
+ typename lexer::token_type get_token()
+ {
+ last_token = m_lexer.scan();
+ return last_token;
+ }
+
+ void expect(typename lexer::token_type t) const
+ {
+ if (t != last_token)
+ {
+ std::string error_msg = "parse error - unexpected \'";
+ error_msg += m_lexer.get_token();
+ error_msg += "\' (" + lexer::token_type_name(last_token);
+ error_msg += "); expected " + lexer::token_type_name(t);
+ throw std::invalid_argument(error_msg);
+ }
+ }
+
+ void unexpect(typename lexer::token_type t) const
+ {
+ if (t == last_token)
+ {
+ std::string error_msg = "parse error - unexpected \'";
+ error_msg += m_lexer.get_token();
+ error_msg += "\' (";
+ error_msg += lexer::token_type_name(last_token) + ")";
+ throw std::invalid_argument(error_msg);
+ }
+ }
+
+ private:
+ /// current level of recursion
+ int depth = 0;
+ /// callback function
+ parser_callback_t callback;
+ /// the type of the last read token
+ typename lexer::token_type last_token = lexer::token_type::uninitialized;
+ /// the lexer
+ lexer m_lexer;
+ };
+};
+
+
+/////////////
+// presets //
+/////////////
+
+/*!
+@brief default JSON class
+
+This type is the default specialization of the @ref basic_json class which uses
+the standard template types.
+*/
+using json = basic_json<>;
+}
+
+
+/////////////////////////
+// nonmember functions //
+/////////////////////////
+
+// specialization of std::swap, and std::hash
+namespace std
+{
+/*!
+@brief exchanges the values of two JSON objects
+*/
+template <>
+inline void swap(nlohmann::json& j1,
+ nlohmann::json& j2) noexcept(
+ is_nothrow_move_constructible<nlohmann::json>::value and
+ is_nothrow_move_assignable<nlohmann::json>::value
+ )
+{
+ j1.swap(j2);
+}
+
+/// hash value for JSON objects
+template <>
+struct hash<nlohmann::json>
+{
+ /// return a hash value for a JSON object
+ std::size_t operator()(const nlohmann::json& j) const
+ {
+ // a naive hashing via the string representation
+ const auto& h = hash<nlohmann::json::string_t>();
+ return h(j.dump());
+ }
+};
+}
+
+/*!
+@brief user-defined string literal for JSON values
+
+This operator implements a user-defined string literal for JSON objects. It can
+be used by adding \p "_json" to a string literal and returns a JSON object if
+no parse error occurred.
+
+@param[in] s a string representation of a JSON object
+@return a JSON object
+*/
+inline nlohmann::json operator "" _json(const char* s, std::size_t)
+{
+ return nlohmann::json::parse(reinterpret_cast<nlohmann::json::string_t::value_type*>
+ (const_cast<char*>(s)));
+}
+
+#endif
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "../common/OptionParser.h"
+#include "PbIndexDump.h"
+#include "PbIndexDumpVersion.h"
+#include "Settings.h"
+#include <cassert>
+#include <iostream>
+using namespace std;
+
+static
+pbindexdump::Settings fromCommandLine(optparse::OptionParser& parser,
+ int argc,
+ char* argv[])
+{
+ const optparse::Values options = parser.parse_args(argc, argv);
+ pbindexdump::Settings settings;
+
+ // input
+ const vector<string> positionalArgs = parser.args();
+ const size_t numPositionalArgs = positionalArgs.size();
+ if (numPositionalArgs == 0)
+ settings.inputPbiFilename_ = "-"; // stdin
+ else if (numPositionalArgs == 1)
+ settings.inputPbiFilename_ = parser.args().front();
+ else {
+ assert(numPositionalArgs > 1);
+ settings.errors_.push_back("pbindexdump does not support more than one input file per run");
+ }
+
+ // output format
+ if (options.is_set("format"))
+ settings.format_ = options["format"];
+
+ // JSON options
+ if (settings.format_ == "json") {
+ if (options.is_set("json_indent_level"))
+ settings.jsonIndentLevel_ = options.get("json_indent_level");
+ if (options.is_set("json_raw"))
+ settings.jsonRaw_ = options.get("json_raw");
+ } else {
+ if (options.is_set("json_indent_level") ||
+ options.is_set("json_raw"))
+ {
+ settings.errors_.push_back("JSON formatting options not valid on non-JSON output");
+ }
+ }
+
+ return settings;
+}
+
+int main(int argc, char* argv[])
+{
+ // setup help & options
+ optparse::OptionParser parser;
+ parser.description("pbindexdump prints a human-readable view of PBI data to stdout.");
+ parser.prog("pbindexdump");
+ parser.usage("pbindexdump [options] [input]");
+ parser.version(pbindexdump::Version);
+ parser.add_version_option(true);
+ parser.add_help_option(true);
+
+ auto ioGroup = optparse::OptionGroup(parser, "Input/Output");
+ ioGroup.add_option("")
+ .dest("input")
+ .metavar("input")
+ .help("Input PBI file. If not provided, stdin will be used as input.");
+ ioGroup.add_option("--format")
+ .dest("format")
+ .metavar("STRING")
+ .help("Output format, one of:\n"
+ " json, cpp\n\n"
+ "json: pretty-printed JSON [default]\n\n"
+ "cpp: copy/paste-able C++ code that can be used to construct the"
+ " equivalent PacBio::BAM::PbiRawData object");
+ parser.add_option_group(ioGroup);
+
+ auto jsonGroup = optparse::OptionGroup(parser, "JSON Formatting");
+ jsonGroup.add_option("--json-indent-level")
+ .dest("json_indent_level")
+ .metavar("INT")
+ .help("JSON indent level [4]");
+ jsonGroup.add_option("--json-raw")
+ .dest("json_raw")
+ .action("store_true")
+ .help("Prints fields in a manner that more closely reflects the PBI"
+ " file format - presenting data as per-field columns, not"
+ " per-record objects.");
+ parser.add_option_group(jsonGroup);
+
+ // parse command line for settings
+ const pbindexdump::Settings settings = fromCommandLine(parser, argc, argv);
+ if (!settings.errors_.empty()) {
+ cerr << endl;
+ for (const auto e : settings.errors_)
+ cerr << "ERROR: " << e << endl;
+ cerr << endl;
+ parser.print_help();
+ return EXIT_FAILURE;
+ }
+
+ // run tool
+ try {
+ pbindexdump::PbIndexDump::Run(settings);
+ return EXIT_SUCCESS;
+ }
+ catch (std::exception& e) {
+ cerr << "ERROR: " << e.what() << endl;
+ return EXIT_FAILURE;
+ }
+}
--- /dev/null
+
+set(PbmergeSrcDir ${PacBioBAM_ToolsDir}/pbmerge/src)
+
+# create version header
+set(PbMerge_VERSION ${PacBioBAM_VERSION})
+configure_file(
+ ${PbmergeSrcDir}/PbMergeVersion.h.in ${GeneratedDir}/PbMergeVersion.h @ONLY
+)
+
+# list source files
+set(PBMERGE_SOURCES
+ ${ToolsCommonDir}/BamFileMerger.h
+ ${ToolsCommonDir}/OptionParser.cpp
+ ${PbmergeSrcDir}/main.cpp
+)
+
+# build pbmerge executable
+include(PbbamTool)
+create_pbbam_tool(
+ TARGET pbmerge
+ SOURCES ${PBMERGE_SOURCES}
+)
+
+# cram tests
+if (PacBioBAM_build_tests)
+ if(PacBioBAM_auto_validate)
+ # skip for now til we clean up merge tests under autovalidate, too
+ else()
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbmerge_pacbio_ordering.t.in
+ ${GeneratedDir}/pbmerge_pacbio_ordering.t
+ @ONLY
+ )
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbmerge_aligned_ordering.t.in
+ ${GeneratedDir}/pbmerge_aligned_ordering.t
+ @ONLY
+ )
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbmerge_mixed_ordering.t.in
+ ${GeneratedDir}/pbmerge_mixed_ordering.t
+ @ONLY
+ )
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbmerge_dataset.t.in
+ ${GeneratedDir}/pbmerge_dataset.t
+ @ONLY
+ )
+
+ configure_file(
+ ${PacBioBAM_CramTestsDir}/pbmerge_fofn.t.in
+ ${GeneratedDir}/pbmerge_fofn.t
+ @ONLY
+ )
+
+ add_test(
+ NAME pbmerge_CramTests
+ WORKING_DIRECTORY ${PacBioBAM_TestsDir}/scripts
+ COMMAND "python" cram.py
+ ${GeneratedDir}/pbmerge_pacbio_ordering.t
+ ${GeneratedDir}/pbmerge_aligned_ordering.t
+ ${GeneratedDir}/pbmerge_mixed_ordering.t
+ ${GeneratedDir}/pbmerge_dataset.t
+ ${GeneratedDir}/pbmerge_fofn.t
+ )
+
+ endif()
+endif()
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#ifndef PBMERGEVERSION_H
+#define PBMERGEVERSION_H
+
+#include <string>
+
+namespace pbmerge {
+
+const std::string Version = std::string("@PbMerge_VERSION@");
+
+} // namespace pbmerge
+
+#endif // PBMERGEVERSION_H
--- /dev/null
+// Copyright (c) 2015, Pacific Biosciences of California, Inc.
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted (subject to the limitations in the
+// disclaimer below) provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+//
+// * Neither the name of Pacific Biosciences nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
+// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// Author: Derek Barnett
+
+#include "../common/OptionParser.h"
+#include "../common/BamFileMerger.h"
+#include "PbMergeVersion.h"
+#include <cassert>
+#include <iostream>
+using namespace std;
+
+namespace pbmerge {
+
+class Settings
+{
+public:
+ static Settings FromCommandLine(optparse::OptionParser& parser,
+ int argc, char* argv[])
+ {
+ pbmerge::Settings settings;
+ const optparse::Values options = parser.parse_args(argc, argv);
+
+ // input
+ const vector<string> positionalArgs = parser.args();
+ if (positionalArgs.empty())
+ settings.errors_.push_back("at least input one file must be specified");
+ else
+ settings.inputFilenames_ = positionalArgs;
+
+ // output
+ if (options.is_set("output"))
+ settings.outputFilename_ = options["output"];
+ else
+ settings.outputFilename_ = "-"; // stdout
+
+ // PBI?
+ if (settings.outputFilename_ == "-")
+ settings.createPbi_ = false; // always skip PBI if writing to stdout
+ else {
+ if (options.is_set("no_pbi"))
+ settings.createPbi_ = !options.get("no_pbi"); // user-disabled
+ else
+ settings.createPbi_ = true; // not specified, go ahead and generate by default
+ }
+
+ return settings;
+ }
+
+public:
+ std::vector<std::string> inputFilenames_;
+ std::string outputFilename_;
+ bool createPbi_;
+ std::vector<std::string> errors_;
+
+private:
+ Settings(void) { }
+};
+
+} // namespace pbmerge
+
+int main(int argc, char* argv[])
+{
+ // setup help & options
+ optparse::OptionParser parser;
+ parser.description("pbmerge merges PacBio BAM files. If the input is DataSetXML, "
+ "any filters will be applied. If no output filename is specified, "
+ "new BAM will be written to stdout."
+ );
+ parser.prog("pbmerge");
+ parser.usage("pbmerge [options] [-o <out.bam>] <INPUT>");
+ parser.version(pbmerge::Version);
+ parser.add_version_option(true);
+ parser.add_help_option(true);
+
+ auto ioGroup = optparse::OptionGroup(parser, "Input/Output");
+ ioGroup.add_option("-o")
+ .dest("output")
+ .metavar("output")
+ .help("Output BAM filename. ");
+ ioGroup.add_option("--no-pbi")
+ .dest("no_pbi")
+ .action("store_true")
+ .help("Set this option to skip PBI index file creation. PBI creation is "
+ "automatically skipped if no output filename is provided."
+ );
+ ioGroup.add_option("")
+ .dest("input")
+ .metavar("INPUT")
+ .help("Input may be one of:\n"
+ " DataSetXML, list of BAM files, or FOFN\n\n"
+ " fofn: pbmerge -o merged.bam bams.fofn\n\n"
+ " bams: pbmerge -o merged.bam 1.bam 2.bam 3.bam\n\n"
+ " xml: pbmerge -o merged.bam foo.subreadset.xml\n\n"
+ );
+ parser.add_option_group(ioGroup);
+
+ // parse command line for settings
+ const pbmerge::Settings settings = pbmerge::Settings::FromCommandLine(parser, argc, argv);
+ if (!settings.errors_.empty()) {
+ cerr << endl;
+ for (const auto e : settings.errors_)
+ cerr << "ERROR: " << e << endl;
+ cerr << endl;
+ parser.print_help();
+ return EXIT_FAILURE;
+ }
+
+ // run tool
+ try {
+ // setup our @PG entry to add to header
+ PacBio::BAM::ProgramInfo mergeProgram;
+ mergeProgram.Id(string("pbmerge-")+pbmerge::Version)
+ .Name("pbmerge")
+ .Version(pbmerge::Version);
+
+ PacBio::BAM::DataSet dataset;
+ if (settings.inputFilenames_.size() == 1)
+ dataset = PacBio::BAM::DataSet(settings.inputFilenames_.front());
+ else
+ dataset = PacBio::BAM::DataSet(settings.inputFilenames_);
+
+
+ PacBio::BAM::common::BamFileMerger::Merge(dataset,
+ settings.outputFilename_,
+ mergeProgram,
+ settings.createPbi_);
+
+
+// PacBio::BAM::common::BamFileMerger merger(dataset,
+// settings.outputFilename_,
+// mergeProgram,
+// settings.createPbi_);
+// merger.Merge();
+
+ return EXIT_SUCCESS;
+ }
+ catch (std::exception& e) {
+ cerr << "ERROR: " << e.what() << endl;
+ return EXIT_FAILURE;
+ }
+}